From 812c16d7891908e2f45761fa25e7cf29686f78ad Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sat, 27 Jul 2024 15:26:52 +0200 Subject: [PATCH] Emit an error if row and header size mismatch --- .../src/main/scala/fs2/data/csv/package.scala | 45 ++++++++++++++++ .../scala/fs2/data/csv/RowGeneratorTest.scala | 51 +++++++++++++++++++ site/documentation/csv/index.md | 4 +- 3 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 csv/shared/src/test/scala/fs2/data/csv/RowGeneratorTest.scala diff --git a/csv/shared/src/main/scala/fs2/data/csv/package.scala b/csv/shared/src/main/scala/fs2/data/csv/package.scala index 13c250f05..0320bd9bb 100644 --- a/csv/shared/src/main/scala/fs2/data/csv/package.scala +++ b/csv/shared/src/main/scala/fs2/data/csv/package.scala @@ -198,6 +198,10 @@ package object csv { } /** Encode a specified type into a CSV prepending the given headers. */ + @deprecated( + message = + "Emits incorrect data if rows have a different length than headers. Please use `encodeWithGivenHeaders` instead.", + since = "fs2-data 1.11.1") def encodeGivenHeaders[T]: PartiallyAppliedEncodeGivenHeaders[T] = new PartiallyAppliedEncodeGivenHeaders[T](dummy = true) @@ -217,6 +221,27 @@ package object csv { } } + /** Encode a specified type into a CSV prepending the given headers. */ + def encodeWithGivenHeaders[T]: PartiallyAppliedEncodeWithGivenHeaders[T] = + new PartiallyAppliedEncodeWithGivenHeaders[T](dummy = true) + + @nowarn + class PartiallyAppliedEncodeWithGivenHeaders[T](val dummy: Boolean) extends AnyVal { + def apply[F[_], Header](headers: NonEmptyList[Header], + fullRows: Boolean = false, + separator: Char = ',', + newline: String = "\n", + escape: EscapeMode = EscapeMode.Auto)(implicit + F: RaiseThrowable[F], + T: RowEncoder[T], + H: WriteableHeader[Header]): Pipe[F, T, String] = { + val stringPipe = + if (fullRows) lowlevel.toRowStrings[F](separator, newline, escape) + else lowlevel.toStrings[F](separator, newline, escape) + lowlevel.encode[F, T] andThen lowlevel.writeWithGivenHeaders(headers) andThen stringPipe + } + } + /** Encode a specified type into a CSV that contains the headers determined by encoding the first element. Empty if input is. */ def encodeUsingFirstHeaders[T]: PartiallyAppliedEncodeUsingFirstHeaders[T] = new PartiallyAppliedEncodeUsingFirstHeaders(dummy = true) @@ -316,10 +341,30 @@ package object csv { } /** Encode a given type into CSV rows using a set of explicitly given headers. */ + @deprecated( + message = + "Emits incorrect data if rows have a different length than headers. Please use `writeWithGivenHeaders` instead.", + since = "fs2-data 1.11.1") def writeWithHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] = Stream(H(headers)) ++ _.map(_.values) + /** Encode a given type into CSV rows using a set of explicitly given headers. */ + def writeWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit + F: RaiseThrowable[F], + H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] = + attemptWriteWithGivenHeaders(headers).apply(_).rethrow + + /** Encode a given type into CSV rows using a set of explicitly given headers, but signals errors as values. */ + def attemptWriteWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit + H: WriteableHeader[Header]): Pipe[F, Row, Either[CsvException, NonEmptyList[String]]] = { + val headerSize = headers.size + Stream(Right(H(headers))) ++ _.map { row => + val rowSize = row.size + if (rowSize == headerSize) Right(row.values) else Left(new HeaderSizeError(headerSize, rowSize, row.line)) + } + } + /** Encode a given type into CSV rows without headers. */ def writeWithoutHeaders[F[_]]: Pipe[F, Row, NonEmptyList[String]] = _.map(_.values) diff --git a/csv/shared/src/test/scala/fs2/data/csv/RowGeneratorTest.scala b/csv/shared/src/test/scala/fs2/data/csv/RowGeneratorTest.scala new file mode 100644 index 000000000..96afb5846 --- /dev/null +++ b/csv/shared/src/test/scala/fs2/data/csv/RowGeneratorTest.scala @@ -0,0 +1,51 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2.data.csv + +import cats.data.NonEmptyList +import weaver.* + +object RowGeneratorTest extends SimpleIOSuite { + + pureTest("Emit error on wrong row size (#621)") { + val input = List( + Row(NonEmptyList.of("a", "b", "c"), Some(1)), + Row(NonEmptyList.of("d", "e"), Some(2)), + Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)), + Row(NonEmptyList.of("j", "k", "l"), Some(4)) + ) + val headers = NonEmptyList.of("first", "second", "third") + + val result = fs2.Stream.emits(input).through(lowlevel.attemptWriteWithGivenHeaders(headers)).compile.toList + + matches(result) { + case List( + Right(NonEmptyList("first", "second" :: "third" :: Nil)), + Right(NonEmptyList("a", "b" :: "c" :: Nil)), + Left(e1: HeaderSizeError), + Left(e2: HeaderSizeError), + Right(NonEmptyList("j", "k" :: "l" :: Nil)) + ) => + expect.all(e1.expectedColumns == 3, + e1.actualColumns == 2, + e1.line == Some(2L), + e2.expectedColumns == 3, + e2.actualColumns == 4, + e2.line == Some(3L)) + } + } +} diff --git a/site/documentation/csv/index.md b/site/documentation/csv/index.md index 9025e2d06..dea202583 100644 --- a/site/documentation/csv/index.md +++ b/site/documentation/csv/index.md @@ -51,7 +51,7 @@ More high-level pipes are available for the following use cases: * `decodeGivenHeaders` for CSV parsing that requires headers, but they aren't present in the input * `decodeUsingHeaders` for CSV parsing that requires headers and they're present in the input * `encodeWithoutHeaders` for CSV encoding that works entirely without headers (Note: requires `RowEncoder` instead of `CsvRowEncoder`) -* `encodeGivenHeaders` for CSV encoding that works without headers, but they should be added to the output +* `encodeWithGivenHeaders` for CSV encoding that works without headers, but they should be added to the output * `encodeUsingFirstHeaders` for CSV encoding that works with headers. Uses the headers of the first row for the output. ### Dealing with erroneous files @@ -219,7 +219,7 @@ testRows .string ``` -If you want to write headers, use `writeWithHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`. +If you want to write headers, use `writeWithGivenHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`. ## The type classes: Decoders and Encoders