From 3dd23c4fe1e9cf494270aa71536d49402a4e3f80 Mon Sep 17 00:00:00 2001 From: Guido Flohr Date: Wed, 13 Dec 2023 11:10:34 +0200 Subject: [PATCH 1/2] clean discount automatic links Discount considers interpunction like full stops, exclamation marks and so on part of the href of the link. Since this is almost always incorrect, we remove such trailing characters from all links. --- lib/Qgoda/HTMLFilter/CleanUp.pm | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/Qgoda/HTMLFilter/CleanUp.pm b/lib/Qgoda/HTMLFilter/CleanUp.pm index bab9914..7081829 100644 --- a/lib/Qgoda/HTMLFilter/CleanUp.pm +++ b/lib/Qgoda/HTMLFilter/CleanUp.pm @@ -40,4 +40,31 @@ sub comment { return ''; } +sub start { + my ($self, $chunk, %args) = @_; + + if ('a' eq lc $args{tagname} + && defined $args{attr}->{href} + && $args{attr}->{href} =~ s/[!,.:;?]$//) { + $chunk = '<' . $args{tagname}; + + my $attrseq = $args{attrseq}; + my $attr = $args{attr}; + foreach my $key (@$attrseq) { + my $value = $attr->{$key}; + + my %escapes = ( + '"' => '"', + '&' => '&' + ); + $value =~ s/(["&])/$escapes{$1}/g; + $chunk .= qq{ $key="$value"}; + } + + $chunk .= '>'; + } + + return $chunk; +} + 1; From e634033de39f439f1d41025d72ff38f72cf388a5 Mon Sep 17 00:00:00 2001 From: Guido Flohr Date: Wed, 13 Dec 2023 11:33:00 +0200 Subject: [PATCH 2/2] do not discard the interpunction --- lib/Qgoda/HTMLFilter/CleanUp.pm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/Qgoda/HTMLFilter/CleanUp.pm b/lib/Qgoda/HTMLFilter/CleanUp.pm index 7081829..7fff821 100644 --- a/lib/Qgoda/HTMLFilter/CleanUp.pm +++ b/lib/Qgoda/HTMLFilter/CleanUp.pm @@ -45,7 +45,8 @@ sub start { if ('a' eq lc $args{tagname} && defined $args{attr}->{href} - && $args{attr}->{href} =~ s/[!,.:;?]$//) { + && $args{attr}->{href} =~ s/([!,.:;?])$//) { + $self->{__interpunction} = $1; $chunk = '<' . $args{tagname}; my $attrseq = $args{attrseq}; @@ -67,4 +68,15 @@ sub start { return $chunk; } +sub end { + my ($self, $chunk, %args) = @_; + + my $interpunction = delete $self->{__interpunction} // ''; + if ('a' eq $args{tagname}) { + return $chunk . $interpunction; + } else { + return $interpunction . $chunk; + } +} + 1;