Skip to content

Commit

Permalink
Convert 'as' to a new token
Browse files Browse the repository at this point in the history
Summary:
Fixes the parsing of multiple TypeScript as expressions in a row.
Previously `x as any as number` was parsed as `x as (any as number)`,
instead of `(x as any) as number`.

This was because we customized the precedence calculation for `as`
in the function getPrecedenceExcept,
but this custom precedence was not saved in the stack.
Later when we had an as expression in the stack,
we were not getting the right precedence level for it when doing
`getPrecedence(stack.back().opKind)`.

Fix this by checking for `identifier` specially and converting
the `as` identifier to a new token kind, `as_operator`, before pushing
onto the stack.
This can be extended to implement `satisfies` as well.

Reviewed By: tmikov

Differential Revision: D49700751

fbshipit-source-id: 68d4fd7552cb90c3196d546c53989a5f29ba7add
  • Loading branch information
avp authored and facebook-github-bot committed Oct 3, 2023
1 parent 66d3f87 commit 07604e3
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 21 deletions.
15 changes: 15 additions & 0 deletions include/hermes/Parser/JSLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ class Token {
void setPunctuator(TokenKind kind) {
kind_ = kind;
}
/// Set the TokenKind to a given IDENT_OP token.
/// Used when converting identifiers to the corresponding ident op token.
void setIdentOp(TokenKind kind) {
kind_ = kind;
}
void setEof() {
kind_ = TokenKind::eof;
}
Expand Down Expand Up @@ -722,6 +727,16 @@ class JSLexer {
}
};

/// Convert the current token to an identifier-operator token.
/// Identifiers for new TokenKinds can be added here.
/// \pre the current token is an identifier which is an IDENT_OP operator.
/// \param the IDENT_OP token kind to convert to.
void convertCurTokenToIdentOp(TokenKind kind) {
assert(token_.getKind() == TokenKind::identifier);
assert(token_.getIdentifier()->str() == tokenKindStr(kind));
token_.setIdentOp(kind);
}

private:
/// Initialize the storage with the characters between \p begin and \p end.
inline void initStorageWith(const char *begin, const char *end);
Expand Down
7 changes: 7 additions & 0 deletions include/hermes/Parser/TokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#define TEMPLATE(name, str) TOK(name, str)
#endif

#ifndef IDENT_OP
#define IDENT_OP(name, str, precedence) TOK(name, str)
#endif

#ifndef RANGE_MARKER
#define RANGE_MARKER(name) TOK(name, "<" #name ">")
#endif
Expand Down Expand Up @@ -155,6 +159,8 @@ PUNCTUATOR(questionquestionequal, "\?\?=")
PUNCTUATOR(caretequal, "^=")
PUNCTUATOR(equalgreater, "=>")

IDENT_OP(as_operator, "as", 8)

TOK(numeric_literal, "number")
TOK(string_literal, "string")
TOK(regexp_literal, "regexp")
Expand All @@ -175,4 +181,5 @@ RANGE_MARKER(_last_token)
#undef PUNCTUATOR_FLOW
#undef BINOP
#undef TEMPLATE
#undef IDENT_OP
#undef RANGE_MARKER
37 changes: 17 additions & 20 deletions lib/Parser/JSParserImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3978,6 +3978,7 @@ inline unsigned getPrecedence(TokenKind kind) {
static const unsigned precedence[] = {
#define TOK(...) 0,
#define BINOP(name, str, precedence) precedence,
#define IDENT_OP(name, str, precedence) precedence,

// There are two reserved words that are binary operators.
#define RESWORD(name) \
Expand All @@ -4000,22 +4001,22 @@ inline bool isLeftAssoc(TokenKind kind) {
/// except, in which case return 0.
/// \param asIdent if not null, the "as" UniqueString used to parse TS
/// AsExpressions.
inline unsigned getPrecedenceExcept(
const Token *token,
TokenKind except,
UniqueString *asIdent) {
inline unsigned getPrecedenceExcept(const Token *token, TokenKind except) {
const TokenKind kind = token->getKind();
#if HERMES_PARSE_TS
// 'as' has the same precedence as 'in' in TS.
if (LLVM_UNLIKELY(kind == TokenKind::identifier) &&
LLVM_UNLIKELY(token->getIdentifier() == asIdent)) {
return getPrecedence(TokenKind::rw_in);
}
#endif
return LLVM_LIKELY(kind != except) ? getPrecedence(kind) : 0;
}
} // namespace

inline void JSParserImpl::convertIdentOpIfPossible() {
#if HERMES_PARSE_TS
if (LLVM_UNLIKELY(tok_->getKind() == TokenKind::identifier) &&
context_.getParseTS()) {
if (tok_->getIdentifier() == asIdent_)
lexer_.convertCurTokenToIdentOp(TokenKind::as_operator);
}
#endif
};

Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
// The stack can never go deeper than the number of precedence levels,
// unless we have a right-associative operator.
Expand Down Expand Up @@ -4074,9 +4075,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
endLoc,
new (context_) ESTree::LogicalExpressionNode(left, right, opIdent));
#if HERMES_PARSE_TS
} else if (LLVM_UNLIKELY(opKind == TokenKind::identifier)) {
// The only identifier used as a binary operator is 'as' in TS
// and it would only have been pushed if TS parsing was enabled.
} else if (LLVM_UNLIKELY(opKind == TokenKind::as_operator)) {
return setLocation(
startLoc,
endLoc,
Expand Down Expand Up @@ -4129,12 +4128,10 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
topExpr = optExpr.getValue();
}
SMLoc topExprEndLoc = getPrevTokenEndLoc();
convertIdentOpIfPossible();

// While the current token is a binary operator.
while (unsigned precedence = getPrecedenceExcept(
tok_,
exceptKind,
HERMES_PARSE_TS && context_.getParseTS() ? asIdent_ : nullptr)) {
while (unsigned precedence = getPrecedenceExcept(tok_, exceptKind)) {
// If the next operator has no greater precedence than the operator on the
// stack, pop the stack, creating a new binary expression.
while (!stack.empty() && precedence <= getPrecedence(stack.back().opKind)) {
Expand Down Expand Up @@ -4166,8 +4163,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {

topExprStartLoc = tok_->getStartLoc();
#if HERMES_PARSE_TS
if (context_.getParseTS() &&
LLVM_UNLIKELY(stack.back().opKind == TokenKind::identifier)) {
if (LLVM_UNLIKELY(stack.back().opKind == TokenKind::as_operator)) {
auto optRightExpr = parseTypeAnnotationTS();
if (!optRightExpr)
return None;
Expand All @@ -4186,6 +4182,7 @@ Optional<ESTree::Node *> JSParserImpl::parseBinaryExpression(Param param) {
}

topExprEndLoc = getPrevTokenEndLoc();
convertIdentOpIfPossible();
}

// We have consumed all binary operators. Pop the stack, creating expressions.
Expand Down
5 changes: 5 additions & 0 deletions lib/Parser/JSParserImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,11 @@ class JSParserImpl {
Optional<ESTree::Node *> parsePostfixExpression();
Optional<ESTree::Node *> parseUnaryExpression();

/// Convert identifiers to the operator they represent.
/// Called after each parseUnaryExpression to change identifiers that might be
/// operators into their corresponding IDENT_OP tokens.
inline void convertIdentOpIfPossible();

/// Parse a binary expression using a precedence table, in order to decrease
/// recursion depth.
Optional<ESTree::Node *> parseBinaryExpression(Param param);
Expand Down
24 changes: 23 additions & 1 deletion test/Parser/ts/as.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,29 @@ x + y as number;
// CHECK-NEXT: }
// CHECK-NEXT: },
// CHECK-NEXT: "directive": null
// CHECK-NEXT: }
// CHECK-NEXT: },

x as any as number;
// CHECK-NEXT: {
// CHECK-NEXT: "type": "ExpressionStatement",
// CHECK-NEXT: "expression": {
// CHECK-NEXT: "type": "TSAsExpression",
// CHECK-NEXT: "expression": {
// CHECK-NEXT: "type": "TSAsExpression",
// CHECK-NEXT: "expression": {
// CHECK-NEXT: "type": "Identifier",
// CHECK-NEXT: "name": "x"
// CHECK-NEXT: },
// CHECK-NEXT: "typeAnnotation": {
// CHECK-NEXT: "type": "TSAnyKeyword"
// CHECK-NEXT: }
// CHECK-NEXT: },
// CHECK-NEXT: "typeAnnotation": {
// CHECK-NEXT: "type": "TSNumberKeyword"
// CHECK-NEXT: }
// CHECK-NEXT: },
// CHECK-NEXT: "directive": null
// CHECK-NEXT: }
// CHECK-NEXT: ]

// CHECK-NEXT: }
5 changes: 5 additions & 0 deletions tools/hermes-parser/HermesParserJSSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ class HermesParserJSSerializer {
serializeToken(token, TokenType::Keyword); \
break;

#define IDENT_OP(NAME, ...) \
case TokenKind::NAME: \
serializeToken(token, TokenType::Punctuator); \
break;

#include "hermes/Parser/TokenKinds.def"

// Exclude EOF token
Expand Down

0 comments on commit 07604e3

Please sign in to comment.