From 4eeb81569cd87056844c5517dc816b0e6a25ad50 Mon Sep 17 00:00:00 2001 From: ishaangandhi Date: Thu, 24 Sep 2020 23:57:50 -0400 Subject: [PATCH 1/3] Arithmetic expressions i.e. $((exp)) in CST --- src/CST.ml | 1 + src/prelexer.mll | 16 +++-- src/prelexerState.ml | 39 +++++++++++- .../quoting-equal-sign.sh.expected | 9 ++- tests/good/2.3-token-recognition/arith_exp.sh | 1 + .../arith_exp.sh.expected | 60 +++++++++++++++++++ .../redirection_input_duplication.sh.expected | 11 +++- ...redirection_output_duplication.sh.expected | 11 +++- 8 files changed, 140 insertions(+), 8 deletions(-) create mode 100755 tests/good/2.3-token-recognition/arith_exp.sh create mode 100644 tests/good/2.3-token-recognition/arith_exp.sh.expected diff --git a/src/CST.ml b/src/CST.ml index 6befa775..681b841e 100644 --- a/src/CST.ml +++ b/src/CST.ml @@ -322,6 +322,7 @@ and word_cst = word_component list and word_component = | WordSubshell of subshell_kind * program located | WordName of string + | WordArith of word | WordAssignmentWord of assignment_word | WordDoubleQuoted of word | WordSingleQuoted of word diff --git a/src/prelexer.mll b/src/prelexer.mll index 04637f48..043e202b 100644 --- a/src/prelexer.mll +++ b/src/prelexer.mll @@ -381,7 +381,7 @@ rule token current = parse If the current character is an unquoted '$' or '`', the shell shall identify the start of any candidates for parameter expansion (Parameter Expansion), command substitution (Command Substitution), - or ] arithmetic expansion (Arithmetic Expansion) from their + or arithmetic expansion (Arithmetic Expansion) from their introductory unquoted character sequences: '$' or "${", "$(" or '`', and "$((", respectively. The shell shall read sufficient input to determine the end of the unit to be expanded (as explained in @@ -432,10 +432,12 @@ rule token current = parse } | "$((" { - let current = push_string current "$((" in + debug ~rule:"arithmetic-exp" lexbuf current; + let current = push_arith current in let current = next_double_rparen 1 current lexbuf in + let current = pop_arith current in token current lexbuf - } +} (*specification: @@ -715,6 +717,13 @@ and next_double_rparen dplevel current = parse let current = push_string current "((" in next_double_rparen (dplevel+1) current lexbuf } + | "$((" { + debug ~rule:"arithmetic-exp" lexbuf current; + let current = push_arith current in + let current = next_double_rparen (dplevel+1) current lexbuf in + let current = pop_arith current in + current + } | '`' as op | "$" ( '(' as op) { let escaping_level = 0 in (* FIXME: Probably wrong. *) let current = push_string current (Lexing.lexeme lexbuf) in @@ -724,7 +733,6 @@ and next_double_rparen dplevel current = parse next_double_rparen dplevel current lexbuf } | "))" { - let current = push_string current "))" in if dplevel = 1 then current else if dplevel > 1 then next_double_rparen (dplevel-1) current lexbuf diff --git a/src/prelexerState.ml b/src/prelexerState.ml index ea494387..47a096ee 100644 --- a/src/prelexerState.ml +++ b/src/prelexerState.ml @@ -24,6 +24,7 @@ type atom = | WordComponent of (string * word_component) | QuotingMark of quote_kind | AssignmentMark + | ArithMark and quote_kind = SingleQuote | DoubleQuote | OpeningBrace @@ -216,6 +217,7 @@ let string_of_atom = function | WordComponent (s, _) -> s | AssignmentMark -> "|=|" | QuotingMark _ -> "|Q|" + | ArithMark -> "|A|" let contents_of_atom_list atoms = String.concat "" (List.rev_map string_of_atom atoms) @@ -238,6 +240,40 @@ let components_of_atom_list atoms = let components b = components_of_atom_list (buffer b) +let push_arith b = + let cst = ArithMark in + let buffer = AtomBuffer.make (cst :: buffer b) in + { b with buffer } + +let pop_arith b = + let rec aux str_expression expression = function + | [] -> + (str_expression, expression, []) + | ArithMark :: buffer -> (str_expression, expression, buffer) + | (AssignmentMark | QuotingMark _ ) :: buffer -> + aux str_expression expression buffer (* FIXME: Check twice. *) + | WordComponent (w, WordEmpty) :: buffer -> + aux (w ^ str_expression) expression buffer + | WordComponent (w, c) :: buffer -> + aux (w ^ str_expression) (c :: expression) buffer + in + let str_expression, expression, buffer = aux "" [] (buffer b) in + let word = Word (str_expression, expression) in + let expression = WordArith word in + (* match k with + | SingleQuote -> WordSingleQuoted word + | DoubleQuote -> WordDoubleQuoted word + | OpeningBrace -> WordDoubleQuoted word *) + let str_expression = "$((" ^ str_expression ^ "))" + (* match k with + | SingleQuote -> "'" ^ squote ^ "'" + | DoubleQuote -> "\"" ^ squote ^ "\"" + | OpeningBrace -> squote *) + in + let expression = WordComponent (str_expression, expression) in + let buffer = AtomBuffer.make (expression :: buffer) in + { b with buffer } + let push_quoting_mark k b = let cst = QuotingMark k in let buffer = AtomBuffer.make (cst :: buffer b) in @@ -249,7 +285,7 @@ let pop_quotation k b = (squote, quote, []) | QuotingMark k' :: buffer when k = k' -> (squote, quote, buffer) - | (AssignmentMark | QuotingMark _) :: buffer -> + | (AssignmentMark | QuotingMark _ | ArithMark) :: buffer -> aux squote quote buffer (* FIXME: Check twice. *) | WordComponent (w, WordEmpty) :: buffer -> aux (w ^ squote) quote buffer @@ -404,6 +440,7 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens = | WordComponent (_, s) -> [s] | AssignmentMark -> [] | QuotingMark _ -> [] + | ArithMark -> [] ) (buffer current))) in let csts = TildePrefix.recognize csts in diff --git a/tests/good/2.2-quoting/quoting-equal-sign.sh.expected b/tests/good/2.2-quoting/quoting-equal-sign.sh.expected index e233270d..c37ec3f5 100644 --- a/tests/good/2.2-quoting/quoting-equal-sign.sh.expected +++ b/tests/good/2.2-quoting/quoting-equal-sign.sh.expected @@ -268,7 +268,14 @@ [ "Word", "a$((=))b", - [ [ "WordLiteral", "a$((=))b" ] ] + [ + [ "WordLiteral", "a" ], + [ + "WordArith", + [ "Word", "=", [ [ "WordLiteral", "=" ] ] ] + ], + [ "WordLiteral", "b" ] + ] ] ] ] diff --git a/tests/good/2.3-token-recognition/arith_exp.sh b/tests/good/2.3-token-recognition/arith_exp.sh new file mode 100755 index 00000000..e7df8b56 --- /dev/null +++ b/tests/good/2.3-token-recognition/arith_exp.sh @@ -0,0 +1 @@ +echo $((1 * $((2*3)) )) \ No newline at end of file diff --git a/tests/good/2.3-token-recognition/arith_exp.sh.expected b/tests/good/2.3-token-recognition/arith_exp.sh.expected new file mode 100644 index 00000000..86fffb6e --- /dev/null +++ b/tests/good/2.3-token-recognition/arith_exp.sh.expected @@ -0,0 +1,60 @@ +[ + "Program_LineBreak_CompleteCommands_LineBreak", + [ "LineBreak_Empty" ], + [ + "CompleteCommands_CompleteCommand", + [ + "CompleteCommand_CList", + [ + "CList_AndOr", + [ + "AndOr_Pipeline", + [ + "Pipeline_PipeSequence", + [ + "PipeSequence_Command", + [ + "Command_SimpleCommand", + [ + "SimpleCommand_CmdName_CmdSuffix", + [ + "CmdName_Word", + [ "Word", "echo", [ [ "WordName", "echo" ] ] ] + ], + [ + "CmdSuffix_Word", + [ + "Word", + "$((1 * $((2*3 ))))", + [ + [ + "WordArith", + [ + "Word", + "1 * $((2*3 ))", + [ + [ "WordLiteral", "1 * " ], + [ + "WordArith", + [ + "Word", + "2*3 ", + [ [ "WordLiteral", "2*3 " ] ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ], + [ "LineBreak_Empty" ] +] \ No newline at end of file diff --git a/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected b/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected index bd23db36..013ddc8e 100644 --- a/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected +++ b/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected @@ -484,7 +484,16 @@ [ "Word", "$((3 + 1))", - [ [ "WordLiteral", "$((3 + 1))" ] ] + [ + [ + "WordArith", + [ + "Word", + "3 + 1", + [ [ "WordLiteral", "3 + 1" ] ] + ] + ] + ] ] ] ] diff --git a/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected b/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected index 8a4d8f18..af491040 100644 --- a/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected +++ b/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected @@ -484,7 +484,16 @@ [ "Word", "$((3 + 1))", - [ [ "WordLiteral", "$((3 + 1))" ] ] + [ + [ + "WordArith", + [ + "Word", + "3 + 1", + [ [ "WordLiteral", "3 + 1" ] ] + ] + ] + ] ] ] ] From 63b18eb285d8c7ad6928e2089fb9620210fd2f9d Mon Sep 17 00:00:00 2001 From: ishaangandhi Date: Fri, 25 Sep 2020 00:01:13 -0400 Subject: [PATCH 2/3] removed comment --- src/prelexerState.ml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/prelexerState.ml b/src/prelexerState.ml index 47a096ee..bfaa2600 100644 --- a/src/prelexerState.ml +++ b/src/prelexerState.ml @@ -260,15 +260,7 @@ let pop_arith b = let str_expression, expression, buffer = aux "" [] (buffer b) in let word = Word (str_expression, expression) in let expression = WordArith word in - (* match k with - | SingleQuote -> WordSingleQuoted word - | DoubleQuote -> WordDoubleQuoted word - | OpeningBrace -> WordDoubleQuoted word *) let str_expression = "$((" ^ str_expression ^ "))" - (* match k with - | SingleQuote -> "'" ^ squote ^ "'" - | DoubleQuote -> "\"" ^ squote ^ "\"" - | OpeningBrace -> squote *) in let expression = WordComponent (str_expression, expression) in let buffer = AtomBuffer.make (expression :: buffer) in From f1741420704747d6ac2438db832a4841c7a6fef7 Mon Sep 17 00:00:00 2001 From: ishaangandhi Date: Wed, 21 Oct 2020 00:37:03 -0400 Subject: [PATCH 3/3] Fix subshell separation bug --- src/prelexer.mll | 5 +- tests/good/2.3-token-recognition/arith_exp.sh | 4 +- .../arith_exp.sh.expected | 122 ++++++++++++++++-- 3 files changed, 119 insertions(+), 12 deletions(-) diff --git a/src/prelexer.mll b/src/prelexer.mll index 043e202b..d8e970d7 100644 --- a/src/prelexer.mll +++ b/src/prelexer.mll @@ -435,7 +435,6 @@ rule token current = parse debug ~rule:"arithmetic-exp" lexbuf current; let current = push_arith current in let current = next_double_rparen 1 current lexbuf in - let current = pop_arith current in token current lexbuf } @@ -721,18 +720,18 @@ and next_double_rparen dplevel current = parse debug ~rule:"arithmetic-exp" lexbuf current; let current = push_arith current in let current = next_double_rparen (dplevel+1) current lexbuf in - let current = pop_arith current in current } | '`' as op | "$" ( '(' as op) { let escaping_level = 0 in (* FIXME: Probably wrong. *) - let current = push_string current (Lexing.lexeme lexbuf) in + let current = push_separated_string current (Lexing.lexeme lexbuf) in let current = subshell op escaping_level current lexbuf in let expected_closing_char = if op = '`' then '`' else ')' in let current = close_subshell expected_closing_char current lexbuf in next_double_rparen dplevel current lexbuf } | "))" { + let current = pop_arith current in if dplevel = 1 then current else if dplevel > 1 then next_double_rparen (dplevel-1) current lexbuf diff --git a/tests/good/2.3-token-recognition/arith_exp.sh b/tests/good/2.3-token-recognition/arith_exp.sh index e7df8b56..d98fc8e4 100755 --- a/tests/good/2.3-token-recognition/arith_exp.sh +++ b/tests/good/2.3-token-recognition/arith_exp.sh @@ -1 +1,3 @@ -echo $((1 * $((2*3)) )) \ No newline at end of file +echo $((1 * $((2*3)) * 42)) + +echo $((x+=`echo 2`)) \ No newline at end of file diff --git a/tests/good/2.3-token-recognition/arith_exp.sh.expected b/tests/good/2.3-token-recognition/arith_exp.sh.expected index 86fffb6e..7d9f3e6e 100644 --- a/tests/good/2.3-token-recognition/arith_exp.sh.expected +++ b/tests/good/2.3-token-recognition/arith_exp.sh.expected @@ -2,7 +2,64 @@ "Program_LineBreak_CompleteCommands_LineBreak", [ "LineBreak_Empty" ], [ - "CompleteCommands_CompleteCommand", + "CompleteCommands_CompleteCommands_NewlineList_CompleteCommand", + [ + "CompleteCommands_CompleteCommand", + [ + "CompleteCommand_CList", + [ + "CList_AndOr", + [ + "AndOr_Pipeline", + [ + "Pipeline_PipeSequence", + [ + "PipeSequence_Command", + [ + "Command_SimpleCommand", + [ + "SimpleCommand_CmdName_CmdSuffix", + [ + "CmdName_Word", + [ "Word", "echo", [ [ "WordName", "echo" ] ] ] + ], + [ + "CmdSuffix_Word", + [ + "Word", + "$((1 * $((2*3)) * 42))", + [ + [ + "WordArith", + [ + "Word", + "1 * $((2*3)) * 42", + [ + [ "WordLiteral", "1 * " ], + [ + "WordArith", + [ + "Word", + "2*3", + [ [ "WordLiteral", "2*3" ] ] + ] + ], + [ "WordLiteral", " * 42" ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ], + [ "NewLineList_NewLine" ], [ "CompleteCommand_CList", [ @@ -25,21 +82,70 @@ "CmdSuffix_Word", [ "Word", - "$((1 * $((2*3 ))))", + "$((x+=`echo 2`))", [ [ "WordArith", [ "Word", - "1 * $((2*3 ))", + "x+=`echo 2`", [ - [ "WordLiteral", "1 * " ], + [ "WordLiteral", "x+=" ], [ - "WordArith", + "WordSubshell", + [ "SubShellKindBackQuote" ], [ - "Word", - "2*3 ", - [ [ "WordLiteral", "2*3 " ] ] + "Program_LineBreak_CompleteCommands_LineBreak", + [ "LineBreak_Empty" ], + [ + "CompleteCommands_CompleteCommand", + [ + "CompleteCommand_CList", + [ + "CList_AndOr", + [ + "AndOr_Pipeline", + [ + "Pipeline_PipeSequence", + [ + "PipeSequence_Command", + [ + "Command_SimpleCommand", + [ + "SimpleCommand_CmdName_CmdSuffix", + [ + "CmdName_Word", + [ + "Word", + "echo", + [ + [ + "WordName", "echo" + ] + ] + ] + ], + [ + "CmdSuffix_Word", + [ + "Word", + "2", + [ + [ + "WordLiteral", "2" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ], + [ "LineBreak_Empty" ] ] ] ]