From 3f7a0e1226c53fa1d9df13218bdbabcb6bf1973d Mon Sep 17 00:00:00 2001 From: Alexey Toptygin Date: Mon, 14 Dec 2020 11:21:59 -0800 Subject: [PATCH] Use new Pure builtins to make Regex HSL Pure. Summary: This also allows us to delete HH\Lib\_Private\PHPWarningSuppressor. Reviewed By: DavidSnider Differential Revision: D25073943 fbshipit-source-id: 35e46cd6be3dac87a39739d6daf91098af7f2f6d --- src/private.php | 22 ----------- src/regex/private.php | 57 +++++++-------------------- src/regex/regex.php | 89 +++++++++++++++++++++---------------------- 3 files changed, 58 insertions(+), 110 deletions(-) diff --git a/src/private.php b/src/private.php index 47464957..44642dc7 100644 --- a/src/private.php +++ b/src/private.php @@ -56,28 +56,6 @@ function boolval(mixed $val): bool { const string ALPHABET_ALPHANUMERIC = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; -/** - * Many PHP builtins emit warnings to stderr when they fail. This - * class allows us to squash warnings for a time without using PHP's - * `@` annotation. - */ -final class PHPWarningSuppressor implements \IDisposable { - - private int $warningLevel; - - public function __construct() { - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - $this->warningLevel = \error_reporting(0); - } - - public function __dispose(): void { - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - \error_reporting($this->warningLevel); - } -} - /** * Stop eager execution of an async function. * diff --git a/src/regex/private.php b/src/regex/private.php index 2cdeb5b7..125fe78b 100644 --- a/src/regex/private.php +++ b/src/regex/private.php @@ -13,56 +13,27 @@ use namespace HH\Lib\{Regex, Str}; /** - * Temporary stand-in for native match function to be implemented in T30991246. * Returns the first match found in `$haystack` given the regex pattern `$pattern` - * and an optional offset at which to start the search. + * and an offset at which to start the search. The offset is updated to point + * to the start of the match. * * Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack` - * Returns null, or a tuple of - * first, - * a Match containing - * - the entire matching string, at key 0, - * - the results of unnamed capture groups, at integer keys corresponding to - * the groups' occurrence within the pattern, and - * - the results of named capture groups, at string keys matching their respective names, - * and second, - * the integer offset at which this first match occurs in the haystack string. + * Returns null, or a Match containing + * - the entire matching string, at key 0, + * - the results of unnamed capture groups, at integer keys corresponding to + * the groups' occurrence within the pattern, and + * - the results of named capture groups, at string keys matching their respective names, */ -<<__Rx>> // not pure due to preg_match_with_matches + preg_last_error +<<__Pure>> function regex_match( string $haystack, Regex\Pattern $pattern, - int $offset = 0, -): ?(T, int) { - /* HH_FIXME[4200] keep suppressing warnings from bad callers */ - /* HH_FIXME[4387] reported here as of 2020.09.21, hack v4.51.0 */ - using new PHPWarningSuppressor(); + inout int $offset, +): ?T { $offset = validate_offset($offset, Str\length($haystack)); - $match = darray[]; - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - /* HH_FIXME[4200] Rx error without deregister_phpstdlib */ - $status = \preg_match_with_matches( - /* HH_FIXME[4110] */ $pattern, - $haystack, - inout $match, - \PREG_FB__PRIVATE__HSL_IMPL | \PREG_OFFSET_CAPTURE, - $offset, - ); - if ($status === 1) { - $match_out = darray[]; - foreach ($match as $key => $value) { - $match_out[$key] = $value[0]; - } - $offset_out = $match[0][1]; - /* HH_FIXME[4110] Native function won't have this problem */ - return tuple($match_out, $offset_out); - } else if ($status === 0) { - return null; - } else { - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - /* HH_FIXME[4200] Rx error without deregister_phpstdlib */ - throw new Regex\Exception($pattern, \preg_last_error()); + list ($matches, $error) = _Regex\match($haystack, $pattern, inout $offset); + if ($error is nonnull) { + throw new Regex\Exception($pattern, $error); } + return $matches; } diff --git a/src/regex/regex.php b/src/regex/regex.php index a1aaef6c..2199e257 100644 --- a/src/regex/regex.php +++ b/src/regex/regex.php @@ -24,13 +24,13 @@ * the groups' occurrence within the pattern, and * - the results of named capture groups, at string keys matching their respective names. */ -<<__Rx>> +<<__Pure>> function first_match( string $haystack, Pattern $pattern, int $offset = 0, ): ?T { - return _Private\regex_match($haystack, $pattern, $offset)[0] ?? null; + return _Private\regex_match($haystack, $pattern, inout $offset); } /** @@ -40,7 +40,7 @@ function first_match( * * Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`. */ -<<__Rx>> +<<__Pure>> function every_match( string $haystack, Pattern $pattern, @@ -49,21 +49,19 @@ function every_match( $haystack_length = Str\length($haystack); $result = vec[]; while (true) { - $match = _Private\regex_match($haystack, $pattern, $offset); + $match = _Private\regex_match($haystack, $pattern, inout $offset); if ($match === null) { break; } - $captures = $match[0]; - $result[] = $captures; - $match_begin = $match[1]; - $match_length = Str\length(Shapes::at($captures, 0) as string); + $result[] = $match; + $match_length = Str\length(Shapes::at($match, 0) as string); if ($match_length === 0) { - $offset = $match_begin + 1; + $offset++; if ($offset > $haystack_length) { break; } } else { - $offset = $match_begin + $match_length; + $offset += $match_length; } } return $result; @@ -76,13 +74,13 @@ function every_match( * * Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`. */ -<<__Rx>> +<<__Pure>> function matches( string $haystack, Pattern $pattern, int $offset = 0, ): bool { - return _Private\regex_match($haystack, $pattern, $offset) !== null; + return _Private\regex_match($haystack, $pattern, inout $offset) !== null; } /** @@ -93,31 +91,36 @@ function matches( * * Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`. */ +<<__Pure>> function replace( string $haystack, Pattern $pattern, string $replacement, int $offset = 0, ): string { - // replace is the only one of these functions that calls into a preg - // function other than preg_match. It needs to call into preg_replace - // to be able to handle backreferencing in the `$replacement` string. - // preg_replace does not support offsets, so we handle them ourselves, - // consistently with _Private\regex_match. + // replace is the only one of these functions that calls into a native + // helper other than match. It needs its own helper to be able to handle + // backreferencing in the `$replacement` string. Our offset handling is + // trivial so we do it here rather than pushing it down into the helper. $offset = _Private\validate_offset($offset, Str\length($haystack)); + + if ($offset === 0) { + list ($result, $error) = + _Private\_Regex\replace($haystack, $pattern, $replacement); + if ($error is nonnull) { + throw new Regex\Exception($pattern, $error); + } + return $result as nonnull; + } + $haystack1 = Str\slice($haystack, 0, $offset); $haystack2 = Str\slice($haystack, $offset); - - using new _Private\PHPWarningSuppressor(); - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - $haystack3 = \preg_replace($pattern, $replacement, $haystack2); - if ($haystack3 === null) { - /* HH_FIXME[2049] __PHPStdLib */ - /* HH_FIXME[4107] __PHPStdLib */ - throw new Exception($pattern, \preg_last_error()); + list ($result, $error) = + _Private\_Regex\replace($haystack2, $pattern, $replacement); + if ($error is nonnull) { + throw new Regex\Exception($pattern, $error); } - return $haystack1.$haystack3; + return $haystack1 . $result; } /** @@ -128,7 +131,7 @@ function replace( * * Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`. */ -<<__Rx, __AtMostRxAsArgs>> +<<__Pure, __AtMostRxAsArgs>> function replace_with( string $haystack, Pattern $pattern, @@ -139,22 +142,20 @@ function replace_with( $result = Str\slice($haystack, 0, 0); $match_end = 0; while (true) { - $match = _Private\regex_match($haystack, $pattern, $offset); + $match = _Private\regex_match($haystack, $pattern, inout $offset); if ($match === null) { break; } - $captures = $match[0]; - $match_begin = $match[1]; // Copy anything between the previous match and this one - $result .= Str\slice($haystack, $match_end, $match_begin - $match_end); - $result .= $replace_func($captures); - $match_length = Str\length(Shapes::at($captures, 0) as string); - $match_end = $match_begin + $match_length; + $result .= Str\slice($haystack, $match_end, $offset - $match_end); + $result .= $replace_func($match); + $match_length = Str\length(Shapes::at($match, 0) as string); + $match_end = $offset + $match_length; if ($match_length === 0) { // To get the next match (and avoid looping forever), need to skip forward // before searching again // Note that `$offset` is for searching and `$match_end` is for copying - $offset = $match_begin + 1; + $offset++; if ($offset > $haystack_length) { break; } @@ -176,7 +177,7 @@ function replace_with( * * Throws Invariant[Violation]Exception if `$limit` < 2. */ -<<__Rx>> +<<__Pure>> function split( string $haystack, Pattern $delimiter, @@ -195,19 +196,17 @@ function split( $offset = 0; $match_end = 0; $count = 1; - $match = _Private\regex_match($haystack, $delimiter, $offset); + $match = _Private\regex_match($haystack, $delimiter, inout $offset); while ($match && $count < $limit) { - $captures = $match[0]; - $match_begin = $match[1]; // Copy anything between the previous match and this one - $result[] = Str\slice($haystack, $match_end, $match_begin - $match_end); - $match_length = Str\length(Shapes::at($captures, 0) as string); - $match_end = $match_begin + $match_length; + $result[] = Str\slice($haystack, $match_end, $offset - $match_end); + $match_length = Str\length(Shapes::at($match, 0) as string); + $match_end = $offset + $match_length; if ($match_length === 0) { // To get the next match (and avoid looping forever), need to skip forward // before searching again // Note that `$offset` is for searching and `$match_end` is for copying - $offset = $match_begin + 1; + $offset++; if ($offset > $haystack_length) { break; } @@ -215,7 +214,7 @@ function split( $offset = $match_end; } $count++; - $match = _Private\regex_match($haystack, $delimiter, $offset); + $match = _Private\regex_match($haystack, $delimiter, inout $offset); } $result[] = Str\slice($haystack, $match_end); return $result;