From d68f7855df41555823a8186a87b882b245827689 Mon Sep 17 00:00:00 2001 From: Casey Forbes Date: Tue, 12 Dec 2017 16:07:58 -0500 Subject: [PATCH] CharSourceStrscan does not work correctly with UTF-8 strings. Remove it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CharSourceStrScan expects characters to be 1 byte, so strange things happen. For example, rendering: ````

ö a

``` In Ruby 1.9.x: ```

ö <strong>a</strong>

``` In Ruby 2.1 and above: ``` maruku/lib/maruku/input/parse_span.rb:32:in `read_span': invalid byte sequence in UTF-8 (ArgumentError) ``` --- lib/maruku/input/charsource.rb | 163 +-------------------------------- 1 file changed, 1 insertion(+), 162 deletions(-) diff --git a/lib/maruku/input/charsource.rb b/lib/maruku/input/charsource.rb index b07afa7..504b669 100644 --- a/lib/maruku/input/charsource.rb +++ b/lib/maruku/input/charsource.rb @@ -1,25 +1,7 @@ require 'strscan' module MaRuKu::In::Markdown::SpanLevelParser - - # a string scanner coded by me - class CharSourceManual; end - - # a wrapper around StringScanner - class CharSourceStrscan; end - - # A debug scanner that checks the correctness of both - # by comparing their output - class CharSourceDebug; end - - # Choose! - - CharSource = CharSourceManual # faster! 58ms vs. 65ms - #CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius - #CharSource = CharSourceDebug - - - class CharSourceManual + class CharSource def initialize(s, parent=nil) raise "Passed #{s.class}" if not s.kind_of? String @buffer = s @@ -133,147 +115,4 @@ def describe_pos(buffer, buffer_index) buffer.gsub(/^/, ">") end end - - class CharSourceStrscan - - def initialize(s, parent=nil) - @scanner = StringScanner.new(s) - @size = s.size - end - - # Return current char as a String (or nil). - def cur_char - @scanner.peek(1)[0] - end - - # Return the next n chars as a String. - def cur_chars(n) - @scanner.peek(n) - end - - # Return the char after current char as a String (or nil). - def next_char - @scanner.peek(2)[1] - end - - # Return a character as a String, advancing the pointer. - def shift_char - @scanner.getch[0] - end - - # Advance the pointer - def ignore_char - @scanner.getch - end - - # Advance the pointer by n - def ignore_chars(n) - n.times { @scanner.getch } - end - - # Return the rest of the string - def current_remaining_buffer - @scanner.rest - end - - # Returns true if string matches what we're pointing to - def cur_chars_are(string) - @scanner.peek(string.size) == string - end - - # Returns true if Regexp r matches what we're pointing to - def next_matches(r) - @scanner.check(r) - end - - def read_regexp(r) - r.match(@scanner.scan(r)) - end - - def consume_whitespace - @scanner.skip(/\s+/) - end - - def describe - len = 75 - num_before = [len/2, @scanner.pos].min - num_after = [len/2, @scanner.rest_size].min - num_before_max = @scanner.pos - num_after_max = @scanner.rest_size - - num_before = [num_before_max, len - num_after].min - num_after = [num_after_max, len - num_before].min - - index_start = [@scanner.pos - num_before, 0].max - index_end = [@scanner.pos + num_after, @size].min - - size = index_end - index_start - - str = @scanner.string[index_start, size] - str.gsub!("\n", 'N') - str.gsub!("\t", 'T') - - if index_end == @size - str += "EOF" - end - - pre_s = @scanner.pos - index_start - pre_s = [pre_s, 0].max - pre_s2 = [len-pre_s, 0].max - pre = " " * pre_s - - "-" * len + "\n" + - str + "\n" + - "-" * pre_s + "|" + "-" * pre_s2 + "\n" + - pre + "+--- Byte #{@scanner.pos}\n" + - "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" + - @scanner.string.gsub(/^/, ">") - end - end - - class CharSourceDebug - def initialize(s, parent) - @a = CharSourceManual.new(s, parent) - @b = CharSourceStrscan.new(s, parent) - end - - def method_missing(methodname, *args) - a_bef = @a.describe - b_bef = @b.describe - - a = @a.send(methodname, *args) - b = @b.send(methodname, *args) - - if a.kind_of? MatchData - if a.to_a != b.to_a - puts "called: #{methodname}(#{args})" - puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}" - puts "AFTER: " + @a.describe - puts "AFTER: " + @b.describe - puts "BEFORE: " + a_bef - puts "BEFORE: " + b_bef - puts caller.join("\n") - exit - end - else - if a != b - puts "called: #{methodname}(#{args})" - puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}" - puts "" + @a.describe - puts "" + @b.describe - puts caller.join("\n") - exit - end - end - - if @a.cur_char != @b.cur_char - puts "Fuori sincronia dopo #{methodname}(#{args})" - puts "" + @a.describe - puts "" + @b.describe - exit - end - - return a - end - end end