From 07604e3df8b8396696b4687b81e212611d9cee8d Mon Sep 17 00:00:00 2001
From: Aakash Patel <avp@meta.com>
Date: Tue, 3 Oct 2023 12:45:36 -0700
Subject: [PATCH] Convert 'as' to a new token

Summary:
Fixes the parsing of multiple TypeScript as expressions in a row.
Previously `x as any as number` was parsed as `x as (any as number)`,
instead of `(x as any) as number`.

This was because we customized the precedence calculation for `as`
in the function getPrecedenceExcept,
but this custom precedence was not saved in the stack.
Later when we had an as expression in the stack,
we were not getting the right precedence level for it when doing
`getPrecedence(stack.back().opKind)`.

Fix this by checking for `identifier` specially and converting
the `as` identifier to a new token kind, `as_operator`, before pushing
onto the stack.
This can be extended to implement `satisfies` as well.

Reviewed By: tmikov

Differential Revision: D49700751

fbshipit-source-id: 68d4fd7552cb90c3196d546c53989a5f29ba7add
---
 include/hermes/Parser/JSLexer.h               | 15 ++++++++
 include/hermes/Parser/TokenKinds.def          |  7 ++++
 lib/Parser/JSParserImpl.cpp                   | 37 +++++++++----------
 lib/Parser/JSParserImpl.h                     |  5 +++
 test/Parser/ts/as.ts                          | 24 +++++++++++-
 .../HermesParserJSSerializer.cpp              |  5 +++
 6 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/include/hermes/Parser/JSLexer.h b/include/hermes/Parser/JSLexer.h
index f93dee43f31..62f418882ba 100644
--- a/include/hermes/Parser/JSLexer.h
+++ b/include/hermes/Parser/JSLexer.h
@@ -226,6 +226,11 @@ class Token {
   void setPunctuator(TokenKind kind) {
     kind_ = kind;
   }
+  /// Set the TokenKind to a given IDENT_OP token.
+  /// Used when converting identifiers to the corresponding ident op token.
+  void setIdentOp(TokenKind kind) {
+    kind_ = kind;
+  }
   void setEof() {
     kind_ = TokenKind::eof;
   }
@@ -722,6 +727,16 @@ class JSLexer {
     }
   };
 
+  /// Convert the current token to an identifier-operator token.
+  /// Identifiers for new TokenKinds can be added here.
+  /// \pre the current token is an identifier which is an IDENT_OP operator.
+  /// \param the IDENT_OP token kind to convert to.
+  void convertCurTokenToIdentOp(TokenKind kind) {
+    assert(token_.getKind() == TokenKind::identifier);
+    assert(token_.getIdentifier()->str() == tokenKindStr(kind));
+    token_.setIdentOp(kind);
+  }
+
  private:
   /// Initialize the storage with the characters between \p begin and \p end.
   inline void initStorageWith(const char *begin, const char *end);
diff --git a/include/hermes/Parser/TokenKinds.def b/include/hermes/Parser/TokenKinds.def
index c029f291ca1..50abab45f7d 100644
--- a/include/hermes/Parser/TokenKinds.def
+++ b/include/hermes/Parser/TokenKinds.def
@@ -29,6 +29,10 @@
 #define TEMPLATE(name, str) TOK(name, str)
 #endif
 
+#ifndef IDENT_OP
+#define IDENT_OP(name, str, precedence) TOK(name, str)
+#endif
+
 #ifndef RANGE_MARKER
 #define RANGE_MARKER(name) TOK(name, "<" #name ">")
 #endif
@@ -155,6 +159,8 @@ PUNCTUATOR(questionquestionequal, "\?\?=")
 PUNCTUATOR(caretequal,          "^=")
 PUNCTUATOR(equalgreater,        "=>")
 
+IDENT_OP(as_operator,           "as",   8)
+
 TOK(numeric_literal, "number")
 TOK(string_literal, "string")
 TOK(regexp_literal, "regexp")
@@ -175,4 +181,5 @@ RANGE_MARKER(_last_token)
 #undef PUNCTUATOR_FLOW
 #undef BINOP
 #undef TEMPLATE
+#undef IDENT_OP
 #undef RANGE_MARKER
diff --git a/lib/Parser/JSParserImpl.cpp b/lib/Parser/JSParserImpl.cpp
index 35574e7ad16..4f5ec4102b7 100644
--- a/lib/Parser/JSParserImpl.cpp
+++ b/lib/Parser/JSParserImpl.cpp
@@ -3978,6 +3978,7 @@ inline unsigned getPrecedence(TokenKind kind) {
   static const unsigned precedence[] = {
 #define TOK(...) 0,
 #define BINOP(name, str, precedence) precedence,
+#define IDENT_OP(name, str, precedence) precedence,
 
 // There are two reserved words that are binary operators.
 #define RESWORD(name)                                       \
@@ -4000,22 +4001,22 @@ inline bool isLeftAssoc(TokenKind kind) {
 /// except, in which case return 0.
 /// \param asIdent if not null, the "as" UniqueString used to parse TS
 ///   AsExpressions.
-inline unsigned getPrecedenceExcept(
-    const Token *token,
-    TokenKind except,
-    UniqueString *asIdent) {
+inline unsigned getPrecedenceExcept(const Token *token, TokenKind except) {
   const TokenKind kind = token->getKind();
-#if HERMES_PARSE_TS
-  // 'as' has the same precedence as 'in' in TS.
-  if (LLVM_UNLIKELY(kind == TokenKind::identifier) &&
-      LLVM_UNLIKELY(token->getIdentifier() == asIdent)) {
-    return getPrecedence(TokenKind::rw_in);
-  }
-#endif
   return LLVM_LIKELY(kind != except) ? getPrecedence(kind) : 0;
 }
 } // namespace
 
+inline void JSParserImpl::convertIdentOpIfPossible() {
+#if HERMES_PARSE_TS
+  if (LLVM_UNLIKELY(tok_->getKind() == TokenKind::identifier) &&
+      context_.getParseTS()) {
+    if (tok_->getIdentifier() == asIdent_)
+      lexer_.convertCurTokenToIdentOp(TokenKind::as_operator);
+  }
+#endif
+};
+
 Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
   // The stack can never go deeper than the number of precedence levels,
   // unless we have a right-associative operator.
@@ -4074,9 +4075,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
           endLoc,
           new (context_) ESTree::LogicalExpressionNode(left, right, opIdent));
 #if HERMES_PARSE_TS
-    } else if (LLVM_UNLIKELY(opKind == TokenKind::identifier)) {
-      // The only identifier used as a binary operator is 'as' in TS
-      // and it would only have been pushed if TS parsing was enabled.
+    } else if (LLVM_UNLIKELY(opKind == TokenKind::as_operator)) {
       return setLocation(
           startLoc,
           endLoc,
@@ -4129,12 +4128,10 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
     topExpr = optExpr.getValue();
   }
   SMLoc topExprEndLoc = getPrevTokenEndLoc();
+  convertIdentOpIfPossible();
 
   // While the current token is a binary operator.
-  while (unsigned precedence = getPrecedenceExcept(
-             tok_,
-             exceptKind,
-             HERMES_PARSE_TS && context_.getParseTS() ? asIdent_ : nullptr)) {
+  while (unsigned precedence = getPrecedenceExcept(tok_, exceptKind)) {
     // If the next operator has no greater precedence than the operator on the
     // stack, pop the stack, creating a new binary expression.
     while (!stack.empty() && precedence <= getPrecedence(stack.back().opKind)) {
@@ -4166,8 +4163,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
 
     topExprStartLoc = tok_->getStartLoc();
 #if HERMES_PARSE_TS
-    if (context_.getParseTS() &&
-        LLVM_UNLIKELY(stack.back().opKind == TokenKind::identifier)) {
+    if (LLVM_UNLIKELY(stack.back().opKind == TokenKind::as_operator)) {
       auto optRightExpr = parseTypeAnnotationTS();
       if (!optRightExpr)
         return None;
@@ -4186,6 +4182,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
     }
 
     topExprEndLoc = getPrevTokenEndLoc();
+    convertIdentOpIfPossible();
   }
 
   // We have consumed all binary operators. Pop the stack, creating expressions.
diff --git a/lib/Parser/JSParserImpl.h b/lib/Parser/JSParserImpl.h
index 0d21f9fc5d8..8c85bc48387 100644
--- a/lib/Parser/JSParserImpl.h
+++ b/lib/Parser/JSParserImpl.h
@@ -900,6 +900,11 @@ class JSParserImpl {
   Optional<ESTree::Node *> parsePostfixExpression();
   Optional<ESTree::Node *> parseUnaryExpression();
 
+  /// Convert identifiers to the operator they represent.
+  /// Called after each parseUnaryExpression to change identifiers that might be
+  /// operators into their corresponding IDENT_OP tokens.
+  inline void convertIdentOpIfPossible();
+
   /// Parse a binary expression using a precedence table, in order to decrease
   /// recursion depth.
   Optional<ESTree::Node *> parseBinaryExpression(Param param);
diff --git a/test/Parser/ts/as.ts b/test/Parser/ts/as.ts
index 6d1a3d02d35..4008d835749 100644
--- a/test/Parser/ts/as.ts
+++ b/test/Parser/ts/as.ts
@@ -71,7 +71,29 @@ x + y as number;
 // CHECK-NEXT:         }
 // CHECK-NEXT:       },
 // CHECK-NEXT:       "directive": null
-// CHECK-NEXT:     }
+// CHECK-NEXT:     },
 
+x as any as number;
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "type": "ExpressionStatement",
+// CHECK-NEXT:       "expression": {
+// CHECK-NEXT:         "type": "TSAsExpression",
+// CHECK-NEXT:         "expression": {
+// CHECK-NEXT:           "type": "TSAsExpression",
+// CHECK-NEXT:           "expression": {
+// CHECK-NEXT:             "type": "Identifier",
+// CHECK-NEXT:             "name": "x"
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "typeAnnotation": {
+// CHECK-NEXT:             "type": "TSAnyKeyword"
+// CHECK-NEXT:           }
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "typeAnnotation": {
+// CHECK-NEXT:           "type": "TSNumberKeyword"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "directive": null
+// CHECK-NEXT:     }
 // CHECK-NEXT:   ]
+
 // CHECK-NEXT: }
diff --git a/tools/hermes-parser/HermesParserJSSerializer.cpp b/tools/hermes-parser/HermesParserJSSerializer.cpp
index aeea388ce58..49b7aa24909 100644
--- a/tools/hermes-parser/HermesParserJSSerializer.cpp
+++ b/tools/hermes-parser/HermesParserJSSerializer.cpp
@@ -255,6 +255,11 @@ class HermesParserJSSerializer {
     serializeToken(token, TokenType::Keyword); \
     break;
 
+#define IDENT_OP(NAME, ...)                       \
+  case TokenKind::NAME:                           \
+    serializeToken(token, TokenType::Punctuator); \
+    break;
+
 #include "hermes/Parser/TokenKinds.def"
 
         // Exclude EOF token