From 3b2ae1d0a63bf177f8b3e4f8872d752d8e5682d7 Mon Sep 17 00:00:00 2001 From: njlr Date: Mon, 1 Jul 2024 19:14:00 +0100 Subject: [PATCH 1/3] Skip new-line inside of pre --- src/FSharp.Data.Html.Core/HtmlNode.fs | 9 ++++++--- tests/FSharp.Data.Core.Tests/HtmlParser.fs | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/FSharp.Data.Html.Core/HtmlNode.fs b/src/FSharp.Data.Html.Core/HtmlNode.fs index 115b63ee6..08fc38f95 100644 --- a/src/FSharp.Data.Html.Core/HtmlNode.fs +++ b/src/FSharp.Data.Html.Core/HtmlNode.fs @@ -132,7 +132,10 @@ type HtmlNode = | HtmlText _ -> true | _ -> false) - if canAddNewLine && not onlyText then newLine 0 + let isPreTag = name = "pre" + + if canAddNewLine && not (onlyText || isPreTag) then newLine 0 + append "<" append name @@ -150,14 +153,14 @@ type HtmlNode = appendEndTag name else append ">" - if not onlyText then newLine 2 + if not (onlyText || isPreTag) then newLine 2 let mutable canAddNewLine = false for element in elements do serialize sb (indentation + 2) canAddNewLine element canAddNewLine <- true - if not onlyText then newLine 0 + if not (onlyText || isPreTag) then newLine 0 appendEndTag name | HtmlText str -> append str | HtmlComment str -> diff --git a/tests/FSharp.Data.Core.Tests/HtmlParser.fs b/tests/FSharp.Data.Core.Tests/HtmlParser.fs index fe9a26fc2..392628f2a 100644 --- a/tests/FSharp.Data.Core.Tests/HtmlParser.fs +++ b/tests/FSharp.Data.Core.Tests/HtmlParser.fs @@ -857,6 +857,18 @@ let ``Drops whitespace outside pre``() = let expected = $"
%s{nl} foo
    bar    
baz%s{nl}
" result |> should equal expected +[] +let ``Maintain whitespace inside pre tag through round-trip``() = + let html = """
+Line 1
+Line 2
+Line 3
""" + + let result = HtmlDocument.Parse(html).ToString() + + let expected = html + result |> should equal expected + [] let ``Can parse national rail mobile site correctly``() = HtmlDocument.Load "UKDepartures.html" From 48efd347e8c98693488f6d36c9837332bf471af1 Mon Sep 17 00:00:00 2001 From: njlr Date: Mon, 1 Jul 2024 19:16:21 +0100 Subject: [PATCH 2/3] Format --- src/FSharp.Data.Html.Core/HtmlNode.fs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/FSharp.Data.Html.Core/HtmlNode.fs b/src/FSharp.Data.Html.Core/HtmlNode.fs index 08fc38f95..3f3199dd8 100644 --- a/src/FSharp.Data.Html.Core/HtmlNode.fs +++ b/src/FSharp.Data.Html.Core/HtmlNode.fs @@ -134,7 +134,8 @@ type HtmlNode = let isPreTag = name = "pre" - if canAddNewLine && not (onlyText || isPreTag) then newLine 0 + if canAddNewLine && not (onlyText || isPreTag) then + newLine 0 append "<" append name From 1e9e6ba86855baf17be373c340f40170ab1e8d03 Mon Sep 17 00:00:00 2001 From: njlr Date: Wed, 3 Jul 2024 18:09:24 +0100 Subject: [PATCH 3/3] Update HtmlProvider docs --- docs/library/HtmlProvider.fsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/library/HtmlProvider.fsx b/docs/library/HtmlProvider.fsx index ff553f6e9..36877c8bb 100644 --- a/docs/library/HtmlProvider.fsx +++ b/docs/library/HtmlProvider.fsx @@ -66,7 +66,7 @@ The `Load` method allows reading the data from a file or web resource. We could The following sample calls the `Load` method with an URL that points to a live version of the same page on wikipedia. *) // Download the table for the 2017 F1 calendar from Wikipedia -let f1Calendar = F1_2017.Load(F1_2017_URL).Tables.``Season calendaredit`` +let f1Calendar = F1_2017.Load(F1_2017_URL).Tables.``Season calendar`` // Look at the top row, being the first race of the calendar let firstRow = f1Calendar.Rows |> Seq.head @@ -146,7 +146,7 @@ let doctorWho = new HtmlProvider() // Get the average number of viewers for each doctor's series run let viewersByDoctor = - doctorWho.Tables.``Season 1 (1963-1964) edit``.Rows + doctorWho.Tables.``Season 1 (1963-1964)``.Rows |> Seq.groupBy (fun season -> season.``Directed by``) |> Seq.map (fun (doctor, seasons) -> let averaged =