Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hopefully fix unicodes and indented pretty printing #1581

Merged
merged 24 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d34124e
Hopefully fix unicodes and indented pretty printing
aaruni96 Jan 29, 2024
d600020
Look after the case of splitting lines with unicode
aaruni96 Jan 29, 2024
811259f
Fix indentation
aaruni96 Jan 29, 2024
33ccfd5
Use `collect` to build strings
aaruni96 Jan 29, 2024
eee85f5
Add some explaning comments
aaruni96 Jan 29, 2024
8efc22d
Normalize string before trying to use it
aaruni96 Jan 29, 2024
c8045b7
Add comment about new dispatch for Unicode.graphemes
aaruni96 Jan 29, 2024
76ded33
Bugfix: use the right string for spillover, add more tests.
aaruni96 Jan 30, 2024
6dfa52f
Use graphemes()
aaruni96 Feb 2, 2024
aaa8adc
Update src/PrettyPrinting.jl
aaruni96 Feb 2, 2024
f6a4834
Add test for "evil a"
aaruni96 Feb 5, 2024
b4a7478
Merge branch 'ak96/indented_unicodes' of github.com:aaruni96/Abstract…
aaruni96 Feb 5, 2024
9d6ae07
Avoid a crash for graphemes with textwidth > 1
aaruni96 Feb 5, 2024
018a45c
Bugfix: use length of iterator, not string
aaruni96 Feb 8, 2024
ac382ea
Cosmetic: fix indentation of a testcase
aaruni96 Feb 8, 2024
c0e77bb
Account for unicode characters of width 2
aaruni96 Feb 8, 2024
f7b2616
Remove unnecessary println()s
aaruni96 Feb 8, 2024
0530ed8
Collect restiter once, outside loop
aaruni96 Feb 8, 2024
f985a0f
Add comment about the graphemes business
aaruni96 Feb 8, 2024
aea371b
Deal with lowercasefirst once at the beginning
aaruni96 Feb 8, 2024
24ca34d
Expand code coverage, fix array slice
aaruni96 Feb 12, 2024
b8a9e49
save width to a variable (instead of calculating it twice)
aaruni96 Feb 16, 2024
0d661d8
Include more review suggestions
aaruni96 Feb 16, 2024
be44068
Move lowercase checking further up
aaruni96 Feb 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 76 additions & 13 deletions src/PrettyPrinting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1584,30 +1584,93 @@
# printed to an IOBuffer for comparisons
c = _isbuffer(io) && !io.force_newlines ? typemax(Int) : displaysize(io)[2]
ind = io.indent_level * textwidth(io.indent_str)
limit = c - ind > 0 ? c - ind : c
# there might be already something written
if c - ind - io.printed < 0
spaceleft = mod(c - ind - io.printed, c)
else
spaceleft = c - ind - io.printed
end
#@show spaceleft
firstlen = min(spaceleft, length(str))
firststr = str[1:firstlen]
if io.lowercasefirst
written += write(io.io, lowercasefirst(firststr))
io.lowercasefirst = false
else
written += write(io.io, firststr)
io.lowercasefirst = false
str = lowercasefirst(str)
io.lowercasefirst = false
end
io.printed += textwidth(firststr)
reststr = str[firstlen + 1:end]
it = Iterators.partition(1:textwidth(reststr), c - ind > 0 ? c - ind : c)
for i in it
# The following code deals with line wrapping of Unicode text, including
aaruni96 marked this conversation as resolved.
Show resolved Hide resolved
# double-width symbols and more.
_graphemes = Base.Unicode.graphemes(str)
aaruni96 marked this conversation as resolved.
Show resolved Hide resolved
firstlen = min(spaceleft, length(_graphemes))
# make an iterator over valid indices
firstiter = Base.Iterators.take(_graphemes, firstlen)
restiter = Base.Iterators.drop(_graphemes, firstlen)
aaruni96 marked this conversation as resolved.
Show resolved Hide resolved
firststr = join(firstiter)
aaruni96 marked this conversation as resolved.
Show resolved Hide resolved
width = textwidth(firststr)
if length(firstiter) == width
written += write(io.io, firststr)
io.printed += width
else
#firstline is wider than number of graphemes
partcollect = collect(firstiter)
printstr = ""
j = 1
width = 0
while width < (limit)
printstr *= partcollect[j]
j += 1
width += textwidth(partcollect[j])
if j > length(partcollect)
break
end
end
written += write(io.io, printstr)
io.printed += width

#the spillover string
written += write(io.io, "\n")
written += write_indent(io)
written += write(io.io, reststr[i])
io.printed = textwidth(reststr[i])
printstr = join(collect(firstiter)[j:end])
written += write(io.io, printstr)
io.printed += textwidth(printstr)
end
it = Iterators.partition(1:length(restiter), limit)
restcollect = collect(restiter)
for i in it
# partitions of the spillover text
partcollect = restcollect[i]
partstr = join(partcollect)
width = textwidth(partstr)
if width < (limit) || length(i) == width
written += write(io.io, "\n")
written += write_indent(io)
written += write(io.io, partstr)
io.printed = width
else
# width is more than the number of graphemes
# we can only ever get double length lines
# (assuming non standard width can only be 2.)
# (see https://github.com/alacritty/alacritty/issues/265#issue-199665364 )
printstr = ""
j = 1
while textwidth(printstr) < (limit)
printstr *= partcollect[j]
j += 1
if j > length(partcollect)
break
end
end
written += write(io.io, "\n")
written += write_indent(io)
written += write(io.io, printstr)
io.printed = textwidth(printstr)
# print the second part
# there are at most two parts due to our assumption
# that no grapheme exceeds double width
printstr = join(partcollect[j:end])
aaruni96 marked this conversation as resolved.
Show resolved Hide resolved
written += write(io.io, "\n")

Check warning on line 1669 in src/PrettyPrinting.jl

View check run for this annotation

Codecov / codecov/patch

src/PrettyPrinting.jl#L1665-L1669

Added lines #L1665 - L1669 were not covered by tests
written += write_indent(io)
written += write(io.io, printstr)
io.printed = textwidth(printstr)
end

Check warning on line 1673 in src/PrettyPrinting.jl

View check run for this annotation

Codecov / codecov/patch

src/PrettyPrinting.jl#L1671-L1673

Added lines #L1671 - L1673 were not covered by tests
end
return written
end
Expand Down
143 changes: 143 additions & 0 deletions test/PrettyPrinting-test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,30 @@ let
" test\n" *
" test"

# Test unicode
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
println(io, "testing unicode")
print(io, AbstractAlgebra.Indent(), "ŎŚĊĂŖ")
@test String(take!(io)) == "testing unicode\n" *
" ŎŚĊĂŖ"

# Test evil unicodes
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
_, c = displaysize(io)
print(io, AbstractAlgebra.Indent())
ellipses = String([0xe2, 0x80, 0xa6])
wedge = String([0xe2, 0x88, 0xa7])
iacute = String([0xc3, 0xad])
str = wedge ^25 * ellipses^25 * iacute^50
print(io, "aa", str)
@test String(take!(io)) == " aa∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧∧" *
"…………………………………………………………………" *
"íííííííííííííííííííííííííí\n" *
" íííííííííííííííííííííííí"


# Test string longer than width
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
Expand All @@ -431,6 +455,125 @@ let
" aa" * "t"^(c - 6) * "\n" *
" tttttt"

# Test unicode string longer than width
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
_, c = displaysize(io)
print(io, AbstractAlgebra.Indent())
println(io, "Ŏ"^c)
println(io, "aa", "Ś"^c)
print(io, AbstractAlgebra.Indent())
print(io, "aa", "Ŗ"^c)
@test String(take!(io)) == " " * "Ŏ"^(c-2) * "\n" *
" ŎŎ" * "\n" *
" aa" * "Ś"^(c-4) * "\n" *
" ŚŚŚŚ" * "\n" *
" aa" * "Ŗ"^(c-6) * "\n" *
" ŖŖŖŖŖŖ"

# Test evil unicode string much longer than width
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
_, c = displaysize(io)
ellipses = String([0xe2, 0x80, 0xa6])
wedge = String([0xe2, 0x88, 0xa7])
iacute = String([0xc3, 0xad])
evil_a = String([0x61, 0xcc, 0x81, 0xcc, 0xa7, 0xcc, 0xa7])
print(io, AbstractAlgebra.Indent())
println(io, "Ŏ"^c)
println(io, ellipses^c)
println(io, "aa", "Ś"^c)
println(io, "bb", wedge^c)
print(io, AbstractAlgebra.Indent())
println(io, "aa", "Ŗ"^c)
print(io, iacute^c)
println(io, evil_a^c)
print(io, evil_a^c)
@test String(take!(io)) == " " * "Ŏ"^(c-2) * "\n" *
" ŎŎ" * "\n" *
" " * ellipses^(c-2) * "\n" *
" " * ellipses^2 * "\n" *
" aa" * "Ś"^(c-4) * "\n" *
" ŚŚŚŚ" * "\n" *
" bb" * wedge^(c-4) * "\n" *
" " * wedge^4 * "\n" *
" aa" * "Ŗ"^(c-6) * "\n" *
" ŖŖŖŖŖŖ" * "\n" *
" " * iacute^(c-4) * "\n" *
" " * iacute^4 * evil_a^(c-8) * "\n" *
" " * evil_a^(8) * "\n" *
" " * evil_a^(c-4) * "\n" *
" " * evil_a^4

# Test graphemes with non standard width
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
_, c = displaysize(io)
boat = String([0xe2, 0x9b, 0xb5])
family = String([0xf0, 0x9f, 0x91, 0xaa])
print(io, AbstractAlgebra.Indent())
println(io, (boat * family)^40)
print(io, (boat * family)^40)
@test String(take!(io)) == " " * (boat*family)^19 * boat * "\n" *
" " * (family*boat)^19 * family * "\n" *
" " * boat * family * "\n" *
" " * (boat*family)^19 * boat * "\n" *
" " * (family*boat)^19 * family * "\n" *
" " * boat * family

# Test graphemes with standard and non standard width mixed in
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
_, c = displaysize(io)
ellipses = String([0xe2, 0x80, 0xa6])
wedge = String([0xe2, 0x88, 0xa7])
iacute = String([0xc3, 0xad])
evil_a = String([0x61, 0xcc, 0x81, 0xcc, 0xa7, 0xcc, 0xa7])
boat = String([0xe2, 0x9b, 0xb5])
family = String([0xf0, 0x9f, 0x91, 0xaa])
print(io, AbstractAlgebra.Indent())
println(io, "Ŏ"^c)
println(io, ellipses^c)
println(io, "aa", "Ś"^c)
println(io, boat^(3*c))
println(io, "bb", wedge^c)
print(io, AbstractAlgebra.Indent())
println(io, "aa", "Ŗ"^c)
println(io, family^(3*c))
println(io, iacute^c)
println(io, evil_a^c)
print(io, evil_a^c)
@test String(take!(io)) == " " * "Ŏ"^(c-2) * "\n" *
" ŎŎ" * "\n" *
" " * ellipses^(c-2) * "\n" *
" " * ellipses^2 * "\n" *
" aa" * "Ś"^(c-4) * "\n" *
" ŚŚŚŚ" * "\n" *
" " * boat^39 * "\n" *
" " * boat^39 * "\n" *
" " * boat^39 * "\n" *
" " * boat^39 * "\n" *
" " * boat^39 * "\n" *
" " * boat^39 * "\n" *
" " * boat^6 * "\n" *
" bb" * wedge^(c-4) * "\n" *
" " * wedge^4 * "\n" *
" aa" * "Ŗ"^(c-6) * "\n" *
" ŖŖŖŖŖŖ" * "\n" *
" " * family^38 * "\n" *
" " * family^38 * "\n" *
" " * family^38 * "\n" *
" " * family^38 * "\n" *
" " * family^38 * "\n" *
" " * family^38 * "\n" *
" " * family^12 * "\n" *
" " * iacute^(c-4) * "\n" *
" " * iacute^4 *"\n" *
" " * evil_a^(c-4) * "\n" *
" " * evil_a^(4) * "\n" *
" " * evil_a^(c-4) * "\n" *
" " * evil_a^4

# Test too much indentation
io = IOBuffer()
io = AbstractAlgebra.pretty(io, force_newlines = true)
Expand Down
Loading