From 4eeb81569cd87056844c5517dc816b0e6a25ad50 Mon Sep 17 00:00:00 2001
From: ishaangandhi <ishaangandhi@gmail.com>
Date: Thu, 24 Sep 2020 23:57:50 -0400
Subject: [PATCH 1/3] Arithmetic expressions i.e. $((exp)) in CST

---
 src/CST.ml                                    |  1 +
 src/prelexer.mll                              | 16 +++--
 src/prelexerState.ml                          | 39 +++++++++++-
 .../quoting-equal-sign.sh.expected            |  9 ++-
 tests/good/2.3-token-recognition/arith_exp.sh |  1 +
 .../arith_exp.sh.expected                     | 60 +++++++++++++++++++
 .../redirection_input_duplication.sh.expected | 11 +++-
 ...redirection_output_duplication.sh.expected | 11 +++-
 8 files changed, 140 insertions(+), 8 deletions(-)
 create mode 100755 tests/good/2.3-token-recognition/arith_exp.sh
 create mode 100644 tests/good/2.3-token-recognition/arith_exp.sh.expected

diff --git a/src/CST.ml b/src/CST.ml
index 6befa775..681b841e 100644
--- a/src/CST.ml
+++ b/src/CST.ml
@@ -322,6 +322,7 @@ and word_cst = word_component list
 and word_component =
   | WordSubshell of subshell_kind * program located
   | WordName of string
+  | WordArith of word
   | WordAssignmentWord of assignment_word
   | WordDoubleQuoted of word
   | WordSingleQuoted of word
diff --git a/src/prelexer.mll b/src/prelexer.mll
index 04637f48..043e202b 100644
--- a/src/prelexer.mll
+++ b/src/prelexer.mll
@@ -381,7 +381,7 @@ rule token current = parse
    If the current character is an unquoted '$' or '`', the shell shall
    identify the start of any candidates for parameter expansion
    (Parameter Expansion), command substitution (Command Substitution),
-   or ] arithmetic expansion (Arithmetic Expansion) from their
+   or arithmetic expansion (Arithmetic Expansion) from their
    introductory unquoted character sequences: '$' or "${", "$(" or
    '`', and "$((", respectively. The shell shall read sufficient input
    to determine the end of the unit to be expanded (as explained in
@@ -432,10 +432,12 @@ rule token current = parse
   }
 
 | "$((" {
-    let current = push_string current "$((" in
+    debug ~rule:"arithmetic-exp" lexbuf current;
+    let current = push_arith current in
     let current = next_double_rparen 1 current lexbuf in
+    let current = pop_arith current in
     token current lexbuf
-  }
+}
 
 (*specification:
 
@@ -715,6 +717,13 @@ and next_double_rparen dplevel current = parse
     let current = push_string current "((" in
     next_double_rparen (dplevel+1) current lexbuf
   }
+  | "$((" {
+    debug ~rule:"arithmetic-exp" lexbuf current;
+    let current = push_arith current in
+    let current = next_double_rparen (dplevel+1) current lexbuf in
+    let current = pop_arith current in
+    current
+  }
   | '`' as op | "$" ( '(' as op) {
     let escaping_level = 0 in (* FIXME: Probably wrong. *)
     let current = push_string current (Lexing.lexeme lexbuf) in
@@ -724,7 +733,6 @@ and next_double_rparen dplevel current = parse
     next_double_rparen dplevel current lexbuf
   }
   | "))" {
-    let current = push_string current "))" in
     if dplevel = 1
     then current
     else if dplevel > 1 then next_double_rparen (dplevel-1) current lexbuf
diff --git a/src/prelexerState.ml b/src/prelexerState.ml
index ea494387..47a096ee 100644
--- a/src/prelexerState.ml
+++ b/src/prelexerState.ml
@@ -24,6 +24,7 @@ type atom =
   | WordComponent of (string * word_component)
   | QuotingMark of quote_kind
   | AssignmentMark
+  | ArithMark
 
 and quote_kind = SingleQuote | DoubleQuote | OpeningBrace
 
@@ -216,6 +217,7 @@ let string_of_atom = function
   | WordComponent (s, _) -> s
   | AssignmentMark -> "|=|"
   | QuotingMark _ -> "|Q|"
+  | ArithMark -> "|A|"
 
 let contents_of_atom_list atoms =
   String.concat "" (List.rev_map string_of_atom atoms)
@@ -238,6 +240,40 @@ let components_of_atom_list atoms =
 let components b =
   components_of_atom_list (buffer b)
 
+let push_arith b =
+  let cst = ArithMark in
+  let buffer = AtomBuffer.make (cst :: buffer b) in
+  { b with buffer }
+
+let pop_arith b =
+  let rec aux str_expression expression = function
+    | [] ->
+       (str_expression, expression, [])
+    | ArithMark :: buffer -> (str_expression, expression, buffer)
+    | (AssignmentMark | QuotingMark _ ) :: buffer ->
+       aux str_expression expression buffer (* FIXME: Check twice. *)
+    | WordComponent (w, WordEmpty) :: buffer ->
+       aux (w ^ str_expression) expression buffer
+    | WordComponent (w, c) :: buffer ->
+       aux (w ^ str_expression) (c :: expression) buffer
+  in
+  let str_expression, expression, buffer = aux "" [] (buffer b) in
+  let word = Word (str_expression, expression) in
+  let expression = WordArith word in
+    (* match k with
+    | SingleQuote -> WordSingleQuoted word
+    | DoubleQuote -> WordDoubleQuoted word
+    | OpeningBrace -> WordDoubleQuoted word *)
+  let str_expression =  "$((" ^ str_expression ^ "))"
+    (* match k with
+    | SingleQuote -> "'" ^ squote ^ "'"
+    | DoubleQuote -> "\"" ^ squote ^ "\""
+    | OpeningBrace -> squote *)
+  in
+  let expression = WordComponent (str_expression, expression) in
+  let buffer = AtomBuffer.make (expression :: buffer) in
+  { b with buffer }
+
 let push_quoting_mark k b =
   let cst = QuotingMark k in
   let buffer = AtomBuffer.make (cst :: buffer b) in
@@ -249,7 +285,7 @@ let pop_quotation k b =
        (squote, quote, [])
     | QuotingMark k' :: buffer when k = k' ->
        (squote, quote, buffer)
-    | (AssignmentMark | QuotingMark _) :: buffer ->
+    | (AssignmentMark | QuotingMark _ | ArithMark) :: buffer ->
        aux squote quote buffer (* FIXME: Check twice. *)
     | WordComponent (w, WordEmpty) :: buffer ->
        aux (w ^ squote) quote buffer
@@ -404,6 +440,7 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens =
             | WordComponent (_, s) -> [s]
             | AssignmentMark -> []
             | QuotingMark _ -> []
+            | ArithMark -> []
          ) (buffer current)))
       in
       let csts = TildePrefix.recognize csts in
diff --git a/tests/good/2.2-quoting/quoting-equal-sign.sh.expected b/tests/good/2.2-quoting/quoting-equal-sign.sh.expected
index e233270d..c37ec3f5 100644
--- a/tests/good/2.2-quoting/quoting-equal-sign.sh.expected
+++ b/tests/good/2.2-quoting/quoting-equal-sign.sh.expected
@@ -268,7 +268,14 @@
                       [
                         "Word",
                         "a$((=))b",
-                        [ [ "WordLiteral", "a$((=))b" ] ]
+                        [
+                          [ "WordLiteral", "a" ],
+                          [
+                            "WordArith",
+                            [ "Word", "=", [ [ "WordLiteral", "=" ] ] ]
+                          ],
+                          [ "WordLiteral", "b" ]
+                        ]
                       ]
                     ]
                   ]
diff --git a/tests/good/2.3-token-recognition/arith_exp.sh b/tests/good/2.3-token-recognition/arith_exp.sh
new file mode 100755
index 00000000..e7df8b56
--- /dev/null
+++ b/tests/good/2.3-token-recognition/arith_exp.sh
@@ -0,0 +1 @@
+echo  $((1 * $((2*3)) ))
\ No newline at end of file
diff --git a/tests/good/2.3-token-recognition/arith_exp.sh.expected b/tests/good/2.3-token-recognition/arith_exp.sh.expected
new file mode 100644
index 00000000..86fffb6e
--- /dev/null
+++ b/tests/good/2.3-token-recognition/arith_exp.sh.expected
@@ -0,0 +1,60 @@
+[
+  "Program_LineBreak_CompleteCommands_LineBreak",
+  [ "LineBreak_Empty" ],
+  [
+    "CompleteCommands_CompleteCommand",
+    [
+      "CompleteCommand_CList",
+      [
+        "CList_AndOr",
+        [
+          "AndOr_Pipeline",
+          [
+            "Pipeline_PipeSequence",
+            [
+              "PipeSequence_Command",
+              [
+                "Command_SimpleCommand",
+                [
+                  "SimpleCommand_CmdName_CmdSuffix",
+                  [
+                    "CmdName_Word",
+                    [ "Word", "echo", [ [ "WordName", "echo" ] ] ]
+                  ],
+                  [
+                    "CmdSuffix_Word",
+                    [
+                      "Word",
+                      "$((1 * $((2*3 ))))",
+                      [
+                        [
+                          "WordArith",
+                          [
+                            "Word",
+                            "1 * $((2*3 ))",
+                            [
+                              [ "WordLiteral", "1 * " ],
+                              [
+                                "WordArith",
+                                [
+                                  "Word",
+                                  "2*3 ",
+                                  [ [ "WordLiteral", "2*3 " ] ]
+                                ]
+                              ]
+                            ]
+                          ]
+                        ]
+                      ]
+                    ]
+                  ]
+                ]
+              ]
+            ]
+          ]
+        ]
+      ]
+    ]
+  ],
+  [ "LineBreak_Empty" ]
+]
\ No newline at end of file
diff --git a/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected b/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected
index bd23db36..013ddc8e 100644
--- a/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected
+++ b/tests/good/2.7-redirection/2.7.5_duplicating-an-input-file-descriptor/redirection_input_duplication.sh.expected
@@ -484,7 +484,16 @@
                           [
                             "Word",
                             "$((3 + 1))",
-                            [ [ "WordLiteral", "$((3 + 1))" ] ]
+                            [
+                              [
+                                "WordArith",
+                                [
+                                  "Word",
+                                  "3 + 1",
+                                  [ [ "WordLiteral", "3 + 1" ] ]
+                                ]
+                              ]
+                            ]
                           ]
                         ]
                       ]
diff --git a/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected b/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected
index 8a4d8f18..af491040 100644
--- a/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected
+++ b/tests/good/2.7-redirection/2.7.6_duplicating-an-output-file-descriptor/redirection_output_duplication.sh.expected
@@ -484,7 +484,16 @@
                           [
                             "Word",
                             "$((3 + 1))",
-                            [ [ "WordLiteral", "$((3 + 1))" ] ]
+                            [
+                              [
+                                "WordArith",
+                                [
+                                  "Word",
+                                  "3 + 1",
+                                  [ [ "WordLiteral", "3 + 1" ] ]
+                                ]
+                              ]
+                            ]
                           ]
                         ]
                       ]

From 63b18eb285d8c7ad6928e2089fb9620210fd2f9d Mon Sep 17 00:00:00 2001
From: ishaangandhi <ishaangandhi@gmail.com>
Date: Fri, 25 Sep 2020 00:01:13 -0400
Subject: [PATCH 2/3] removed comment

---
 src/prelexerState.ml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/prelexerState.ml b/src/prelexerState.ml
index 47a096ee..bfaa2600 100644
--- a/src/prelexerState.ml
+++ b/src/prelexerState.ml
@@ -260,15 +260,7 @@ let pop_arith b =
   let str_expression, expression, buffer = aux "" [] (buffer b) in
   let word = Word (str_expression, expression) in
   let expression = WordArith word in
-    (* match k with
-    | SingleQuote -> WordSingleQuoted word
-    | DoubleQuote -> WordDoubleQuoted word
-    | OpeningBrace -> WordDoubleQuoted word *)
   let str_expression =  "$((" ^ str_expression ^ "))"
-    (* match k with
-    | SingleQuote -> "'" ^ squote ^ "'"
-    | DoubleQuote -> "\"" ^ squote ^ "\""
-    | OpeningBrace -> squote *)
   in
   let expression = WordComponent (str_expression, expression) in
   let buffer = AtomBuffer.make (expression :: buffer) in

From f1741420704747d6ac2438db832a4841c7a6fef7 Mon Sep 17 00:00:00 2001
From: ishaangandhi <ishaangandhi@gmail.com>
Date: Wed, 21 Oct 2020 00:37:03 -0400
Subject: [PATCH 3/3] Fix subshell separation bug

---
 src/prelexer.mll                              |   5 +-
 tests/good/2.3-token-recognition/arith_exp.sh |   4 +-
 .../arith_exp.sh.expected                     | 122 ++++++++++++++++--
 3 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/src/prelexer.mll b/src/prelexer.mll
index 043e202b..d8e970d7 100644
--- a/src/prelexer.mll
+++ b/src/prelexer.mll
@@ -435,7 +435,6 @@ rule token current = parse
     debug ~rule:"arithmetic-exp" lexbuf current;
     let current = push_arith current in
     let current = next_double_rparen 1 current lexbuf in
-    let current = pop_arith current in
     token current lexbuf
 }
 
@@ -721,18 +720,18 @@ and next_double_rparen dplevel current = parse
     debug ~rule:"arithmetic-exp" lexbuf current;
     let current = push_arith current in
     let current = next_double_rparen (dplevel+1) current lexbuf in
-    let current = pop_arith current in
     current
   }
   | '`' as op | "$" ( '(' as op) {
     let escaping_level = 0 in (* FIXME: Probably wrong. *)
-    let current = push_string current (Lexing.lexeme lexbuf) in
+    let current = push_separated_string current (Lexing.lexeme lexbuf) in
     let current = subshell op escaping_level current lexbuf in
     let expected_closing_char = if op = '`' then '`' else ')' in
     let current = close_subshell expected_closing_char current lexbuf in
     next_double_rparen dplevel current lexbuf
   }
   | "))" {
+    let current = pop_arith current in
     if dplevel = 1
     then current
     else if dplevel > 1 then next_double_rparen (dplevel-1) current lexbuf
diff --git a/tests/good/2.3-token-recognition/arith_exp.sh b/tests/good/2.3-token-recognition/arith_exp.sh
index e7df8b56..d98fc8e4 100755
--- a/tests/good/2.3-token-recognition/arith_exp.sh
+++ b/tests/good/2.3-token-recognition/arith_exp.sh
@@ -1 +1,3 @@
-echo  $((1 * $((2*3)) ))
\ No newline at end of file
+echo  $((1 * $((2*3)) * 42))
+
+echo $((x+=`echo 2`))
\ No newline at end of file
diff --git a/tests/good/2.3-token-recognition/arith_exp.sh.expected b/tests/good/2.3-token-recognition/arith_exp.sh.expected
index 86fffb6e..7d9f3e6e 100644
--- a/tests/good/2.3-token-recognition/arith_exp.sh.expected
+++ b/tests/good/2.3-token-recognition/arith_exp.sh.expected
@@ -2,7 +2,64 @@
   "Program_LineBreak_CompleteCommands_LineBreak",
   [ "LineBreak_Empty" ],
   [
-    "CompleteCommands_CompleteCommand",
+    "CompleteCommands_CompleteCommands_NewlineList_CompleteCommand",
+    [
+      "CompleteCommands_CompleteCommand",
+      [
+        "CompleteCommand_CList",
+        [
+          "CList_AndOr",
+          [
+            "AndOr_Pipeline",
+            [
+              "Pipeline_PipeSequence",
+              [
+                "PipeSequence_Command",
+                [
+                  "Command_SimpleCommand",
+                  [
+                    "SimpleCommand_CmdName_CmdSuffix",
+                    [
+                      "CmdName_Word",
+                      [ "Word", "echo", [ [ "WordName", "echo" ] ] ]
+                    ],
+                    [
+                      "CmdSuffix_Word",
+                      [
+                        "Word",
+                        "$((1 * $((2*3)) * 42))",
+                        [
+                          [
+                            "WordArith",
+                            [
+                              "Word",
+                              "1 * $((2*3)) * 42",
+                              [
+                                [ "WordLiteral", "1 * " ],
+                                [
+                                  "WordArith",
+                                  [
+                                    "Word",
+                                    "2*3",
+                                    [ [ "WordLiteral", "2*3" ] ]
+                                  ]
+                                ],
+                                [ "WordLiteral", " * 42" ]
+                              ]
+                            ]
+                          ]
+                        ]
+                      ]
+                    ]
+                  ]
+                ]
+              ]
+            ]
+          ]
+        ]
+      ]
+    ],
+    [ "NewLineList_NewLine" ],
     [
       "CompleteCommand_CList",
       [
@@ -25,21 +82,70 @@
                     "CmdSuffix_Word",
                     [
                       "Word",
-                      "$((1 * $((2*3 ))))",
+                      "$((x+=`echo 2`))",
                       [
                         [
                           "WordArith",
                           [
                             "Word",
-                            "1 * $((2*3 ))",
+                            "x+=`echo 2`",
                             [
-                              [ "WordLiteral", "1 * " ],
+                              [ "WordLiteral", "x+=" ],
                               [
-                                "WordArith",
+                                "WordSubshell",
+                                [ "SubShellKindBackQuote" ],
                                 [
-                                  "Word",
-                                  "2*3 ",
-                                  [ [ "WordLiteral", "2*3 " ] ]
+                                  "Program_LineBreak_CompleteCommands_LineBreak",
+                                  [ "LineBreak_Empty" ],
+                                  [
+                                    "CompleteCommands_CompleteCommand",
+                                    [
+                                      "CompleteCommand_CList",
+                                      [
+                                        "CList_AndOr",
+                                        [
+                                          "AndOr_Pipeline",
+                                          [
+                                            "Pipeline_PipeSequence",
+                                            [
+                                              "PipeSequence_Command",
+                                              [
+                                                "Command_SimpleCommand",
+                                                [
+                                                  "SimpleCommand_CmdName_CmdSuffix",
+                                                  [
+                                                    "CmdName_Word",
+                                                    [
+                                                      "Word",
+                                                      "echo",
+                                                      [
+                                                        [
+                                                          "WordName", "echo"
+                                                        ]
+                                                      ]
+                                                    ]
+                                                  ],
+                                                  [
+                                                    "CmdSuffix_Word",
+                                                    [
+                                                      "Word",
+                                                      "2",
+                                                      [
+                                                        [
+                                                          "WordLiteral", "2"
+                                                        ]
+                                                      ]
+                                                    ]
+                                                  ]
+                                                ]
+                                              ]
+                                            ]
+                                          ]
+                                        ]
+                                      ]
+                                    ]
+                                  ],
+                                  [ "LineBreak_Empty" ]
                                 ]
                               ]
                             ]