From b7815ac3ee5afdb65b2fe431e5b080a713b0a8dc Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 7 Dec 2023 14:32:57 +0000 Subject: [PATCH 1/4] Improve command input recognition Currently, we simply split the input on whitespace and use the first word as the command name to check if it's a command. But this means that assigning a local variable which's name is the same as a command will also be recognized as a command. For example, in the following case, `info` is recognized as a command: ``` irb(main):001> info = 123 `debug` command is only available when IRB is started with binding.irb => nil ``` This commit improves the command input recognition by using more sophis- ticated regular expressions. --- lib/irb.rb | 36 +++++++++++++++---- lib/irb/cmd/ls.rb | 2 +- ...debug_cmd.rb => test_debug_integration.rb} | 19 +++++++++- 3 files changed, 48 insertions(+), 9 deletions(-) rename test/irb/{test_debug_cmd.rb => test_debug_integration.rb} (94%) diff --git a/lib/irb.rb b/lib/irb.rb index a2a3a7190..6ac0c49f5 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -1102,13 +1102,33 @@ def each_top_level_statement end end + COMMAND_FLAG_REGEXP = /(?-[a-zA-Z]+( +\S+)*)/ + COMMAND_ARG_REGEXP = /(?[^-]\S*)/ + COMMAND_NAME_REGEXP = /(?\S+)/ + SIMPLE_COMMAND_REGEXP = /^#{COMMAND_NAME_REGEXP}$/ + COMMAND_WITH_ARGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP}$/ + COMMAND_WITH_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_FLAG_REGEXP}$/ + COMMAND_WITH_ARGS_AND_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP} +#{COMMAND_FLAG_REGEXP}$/ + + COMMAND_REGEXP = Regexp.union( + SIMPLE_COMMAND_REGEXP, + COMMAND_WITH_ARGS_REGEXP, + COMMAND_WITH_FLAGS_REGEXP, + COMMAND_WITH_ARGS_AND_FLAGS_REGEXP + ) + def build_statement(code) - code.force_encoding(@context.io.encoding) - command_or_alias, arg = code.split(/\s/, 2) - # Transform a non-identifier alias (@, $) or keywords (next, break) - command_name = @context.command_aliases[command_or_alias.to_sym] - command = command_name || command_or_alias - command_class = ExtendCommandBundle.load_command(command) + code.force_encoding(@context.io.encoding) unless code.frozen? + command_match = COMMAND_REGEXP.match(code.strip) + + if command_match + command_or_alias = command_match[:cmd_name] + arg = [command_match[:cmd_arg], command_match[:cmd_flag]].compact.join(' ') + # Transform a non-identifier alias (@, $) or keywords (next, break) + command_name = @context.command_aliases[command_or_alias.to_sym] + command = command_name || command_or_alias + command_class = ExtendCommandBundle.load_command(command) + end if command_class Statement::Command.new(code, command, arg, command_class) @@ -1119,7 +1139,9 @@ def build_statement(code) end def single_line_command?(code) - command = code.split(/\s/, 2).first + command_match = COMMAND_REGEXP.match(code) + return unless command_match + command = command_match[:cmd_name] @context.symbol_alias?(command) || @context.transform_args?(command) end diff --git a/lib/irb/cmd/ls.rb b/lib/irb/cmd/ls.rb index 791b1c1b2..648afddf1 100644 --- a/lib/irb/cmd/ls.rb +++ b/lib/irb/cmd/ls.rb @@ -15,7 +15,7 @@ class Ls < Nop description "Show methods, constants, and variables. `-g [query]` or `-G [query]` allows you to filter out the output." def self.transform_args(args) - if match = args&.match(/\A(?.+\s|)(-g|-G)\s+(?[^\s]+)\s*\n\z/) + if match = args&.match(/\A(?.+\s|)(-g|-G)\s+(?[^\s]+)\s*\z/) args = match[:args] "#{args}#{',' unless args.chomp.empty?} grep: /#{match[:grep]}/" else diff --git a/test/irb/test_debug_cmd.rb b/test/irb/test_debug_integration.rb similarity index 94% rename from test/irb/test_debug_cmd.rb rename to test/irb/test_debug_integration.rb index 0fb45af47..f7e770170 100644 --- a/test/irb/test_debug_cmd.rb +++ b/test/irb/test_debug_integration.rb @@ -6,7 +6,7 @@ require_relative "helper" module TestIRB - class DebugCommandTest < IntegrationTestCase + class DebugIntegrationTest < IntegrationTestCase def setup super @@ -434,5 +434,22 @@ def test_multi_irb_commands_are_not_available_after_activating_the_debugger assert_match(/irb\(main\):001> next/, output) assert_include(output, "Multi-IRB commands are not available when the debugger is enabled.") end + + def test_locals_assignment_not_being_treated_as_debugging_command + write_ruby <<~'ruby' + binding.irb + ruby + + output = run_ruby_file do + type "info = 4" + type "info + 1" + type "quit" + end + + assert_match(/=> 5/, output) + # Since neither `info = ` nor `info + ` are debugging commands, debugger should not be activated in this + # session. + assert_not_match(/irb:rdbg/, output) + end end end From 85c37c45c55c3eca93847cd692f58aa30dac4f93 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 7 Dec 2023 17:46:09 +0000 Subject: [PATCH 2/4] Add dedicated tests for input recognition logic --- test/irb/test_irb.rb | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index fb8b5c2bf..499889067 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -738,4 +738,62 @@ def build_irb IRB::Irb.new(workspace, TestInputMethod.new) end end + + class InputCategorisationTest < TestCase + def test_irb_distinguihes_commands_and_non_commands_correctly + irb = build_irb + + [ + "show_source Foo#bar", + "show_source Foo#bar -s", + "show_source Foo.bar", + "show_source Foo.bar -s", + "show_source == -s", + "show_source =~ -s", + "ls foo.bar -g baz", + "ls -g foo", + "bt 4", + "bt" + ].each do |input| + statement = irb.build_statement(input) + assert_equal(IRB::Statement::Command, statement.class, "Expected #{input} to be a command") + end + + [ + "info + 1", + "info - 1", + "info = 1", + "info = -1", + "info = /regex/", + "info = a", + "info += a", + "info -= a", + "info *= a", + "info &&= a", + "info ||= a", + "info # comment", + "info if foo", + "info ? foo : bar", + "info ; foo", + "info ,other = expr" + ].each do |input| + statement = irb.build_statement(input) + assert_equal(IRB::Statement::Expression, statement.class, "Expected #{input} to not be a command") + end + end + + private + + def build_binding + Object.new.instance_eval { binding } + end + + def build_irb + IRB.init_config(nil) + workspace = IRB::WorkSpace.new(build_binding) + + IRB.conf[:VERBOSE] = false + IRB::Irb.new(workspace, TestInputMethod.new) + end + end end From 218f22b2cb702a0148536d65ab62162f6ceb5833 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Tue, 12 Dec 2023 13:09:38 +0000 Subject: [PATCH 3/4] Raise a warning if the input causes an error and partially matches a command As we now proceed to stricter syntax checking for commands, incorrect command input would be processed as Ruby instead and likely causes errors. In that case, we should raise a warning to the user. For example, if an input is `show_source Foo bar`, the user would see a warning about the possible syntax error for using `show_source` command. But with this approach, we also need add a condition for the `measure` command as it's actually used as a method call with block. --- lib/irb.rb | 35 +++++++++++++++++++++------------- lib/irb/statement.rb | 8 ++++++-- test/irb/test_irb.rb | 45 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 15 deletions(-) diff --git a/lib/irb.rb b/lib/irb.rb index 6ac0c49f5..9e11c874d 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -1036,6 +1036,9 @@ def eval_input rescue Interrupt, Exception => exc handle_exception(exc) @context.workspace.local_variable_set(:_, exc) + if statement.warning + warn statement.warning + end end end end @@ -1119,22 +1122,28 @@ def each_top_level_statement def build_statement(code) code.force_encoding(@context.io.encoding) unless code.frozen? - command_match = COMMAND_REGEXP.match(code.strip) - - if command_match - command_or_alias = command_match[:cmd_name] - arg = [command_match[:cmd_arg], command_match[:cmd_flag]].compact.join(' ') - # Transform a non-identifier alias (@, $) or keywords (next, break) - command_name = @context.command_aliases[command_or_alias.to_sym] - command = command_name || command_or_alias - command_class = ExtendCommandBundle.load_command(command) - end - if command_class - Statement::Command.new(code, command, arg, command_class) + possible_command_or_alias = code.split.first + + possible_command_name = @context.command_aliases[possible_command_or_alias.to_sym] + possible_command_name = possible_command_name || possible_command_or_alias + command_class = ExtendCommandBundle.load_command(possible_command_name) + + command_syntax_match = COMMAND_REGEXP.match(code.strip) + + if command_class && command_syntax_match + arg = [command_syntax_match[:cmd_arg], command_syntax_match[:cmd_flag]].compact.join(' ') + Statement::Command.new(code, possible_command_name, arg, command_class) else + if command_class + warning_msg = <<~MSG + The input `#{code.strip}` was recognised as a Ruby expression, but it matched the name of the `#{possible_command_name}` command. + If you intended to run it as a command, please check if the syntax is correct. + MSG + end + is_assignment_expression = @scanner.assignment_expression?(code, local_variables: @context.local_variables) - Statement::Expression.new(code, is_assignment_expression) + Statement::Expression.new(code, is_assignment_expression, warning: warning_msg) end end diff --git a/lib/irb/statement.rb b/lib/irb/statement.rb index b12110600..4a7f4a7dc 100644 --- a/lib/irb/statement.rb +++ b/lib/irb/statement.rb @@ -2,7 +2,7 @@ module IRB class Statement - attr_reader :code + attr_reader :code, :warning def is_assignment? raise NotImplementedError @@ -21,9 +21,10 @@ def evaluable_code end class Expression < Statement - def initialize(code, is_assignment) + def initialize(code, is_assignment, warning: nil) @code = code @is_assignment = is_assignment + @warning = warning end def suppresses_echo? @@ -66,6 +67,9 @@ def should_be_handled_by_debugger? end def evaluable_code + # Because measure command is used as a method, we need to treat it differently + return @code if @command == "measure" + # Hook command-specific transformation to return valid Ruby code if @command_class.respond_to?(:transform_args) arg = @command_class.transform_args(@arg) diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index 499889067..d0e73036b 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -58,6 +58,51 @@ def test_symbol_aliases_dont_affect_ruby_syntax assert_include output, "=> \"It's a foo\"" assert_include output, "=> \"It's a bar\"" end + + def test_errored_input_that_match_a_command_raises_a_syntax_check_reminder + write_ruby <<~'RUBY' + class Foo + end + binding.irb + RUBY + + errored_output = run_ruby_file do + type "show_source Foo bar" + type "exit!" + end + + # The input should cause an error as it's evaluated as Ruby code + assert_include errored_output, 'undefined local variable or method `bar\'' + + # Because it starts with the name of a command, it should also raise a warning + assert_include errored_output, + 'The input `show_source Foo bar` was recognised as a Ruby expression, but it matched the name of the `show_source` command.' + assert_include errored_output, + 'If you intended to run it as a command, please check if the syntax is correct.' + + output = run_ruby_file do + type "show_source Foo" + type "exit!" + end + + # The input should work as a command + assert_include output, "From: #{@ruby_file.path}:1" + # Therefore, it should not raise a warning + assert_not_include output, + 'If you intended to run it as a command, please check if the syntax is correct.' + + output = run_ruby_file do + type "show_source = 'foo'" + type "show_source + 'bar'" + type "exit!" + end + + # The input should work as Ruby code + assert_include(output, '=> "foobar"') + # Therefore, it should not raise a warning either + assert_not_include output, + 'If you intended to run it as a command, please check if the syntax is correct.' + end end class IrbIOConfigurationTest < TestCase From e02f06ec83a79628929ee3277f946d6726315b8f Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 13 Dec 2023 15:31:01 +0100 Subject: [PATCH 4/4] Address feedback --- lib/irb.rb | 10 +++++----- test/irb/test_irb.rb | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/irb.rb b/lib/irb.rb index 9e11c874d..677e63413 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -1106,12 +1106,12 @@ def each_top_level_statement end COMMAND_FLAG_REGEXP = /(?-[a-zA-Z]+( +\S+)*)/ - COMMAND_ARG_REGEXP = /(?[^-]\S*)/ + COMMAND_ARG_REGEXP = /(?[^- ]\S*)/ COMMAND_NAME_REGEXP = /(?\S+)/ - SIMPLE_COMMAND_REGEXP = /^#{COMMAND_NAME_REGEXP}$/ - COMMAND_WITH_ARGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP}$/ - COMMAND_WITH_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_FLAG_REGEXP}$/ - COMMAND_WITH_ARGS_AND_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP} +#{COMMAND_FLAG_REGEXP}$/ + SIMPLE_COMMAND_REGEXP = /^#{COMMAND_NAME_REGEXP}\z/ + COMMAND_WITH_ARGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP}\z/ + COMMAND_WITH_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_FLAG_REGEXP}\z/ + COMMAND_WITH_ARGS_AND_FLAGS_REGEXP = /^#{COMMAND_NAME_REGEXP} +#{COMMAND_ARG_REGEXP} +#{COMMAND_FLAG_REGEXP}\z/ COMMAND_REGEXP = Regexp.union( SIMPLE_COMMAND_REGEXP, diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index d0e73036b..d407a2a09 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -820,7 +820,8 @@ def test_irb_distinguihes_commands_and_non_commands_correctly "info if foo", "info ? foo : bar", "info ; foo", - "info ,other = expr" + "info ,other = expr", + "info -" # when an input is not a command nor Ruby code, we want to make it Ruby so it raises syntax error ].each do |input| statement = irb.build_statement(input) assert_equal(IRB::Statement::Expression, statement.class, "Expected #{input} to not be a command")