Skip to content

Commit

Permalink
Use new Pure builtins to make Regex HSL Pure.
Browse files Browse the repository at this point in the history
Summary: This also allows us to delete HH\Lib\_Private\PHPWarningSuppressor.

Reviewed By: DavidSnider

Differential Revision: D25073943

fbshipit-source-id: 35e46cd6be3dac87a39739d6daf91098af7f2f6d
  • Loading branch information
alexeyt authored and facebook-github-bot committed Dec 14, 2020
1 parent 0ab94f7 commit 3f7a0e1
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 110 deletions.
22 changes: 0 additions & 22 deletions src/private.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,28 +56,6 @@ function boolval(mixed $val): bool {
const string ALPHABET_ALPHANUMERIC =
'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';

/**
* Many PHP builtins emit warnings to stderr when they fail. This
* class allows us to squash warnings for a time without using PHP's
* `@` annotation.
*/
final class PHPWarningSuppressor implements \IDisposable {

private int $warningLevel;

public function __construct() {
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
$this->warningLevel = \error_reporting(0);
}

public function __dispose(): void {
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
\error_reporting($this->warningLevel);
}
}

/**
* Stop eager execution of an async function.
*
Expand Down
57 changes: 14 additions & 43 deletions src/regex/private.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,56 +13,27 @@
use namespace HH\Lib\{Regex, Str};

/**
* Temporary stand-in for native match function to be implemented in T30991246.
* Returns the first match found in `$haystack` given the regex pattern `$pattern`
* and an optional offset at which to start the search.
* and an offset at which to start the search. The offset is updated to point
* to the start of the match.
*
* Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`
* Returns null, or a tuple of
* first,
* a Match containing
* - the entire matching string, at key 0,
* - the results of unnamed capture groups, at integer keys corresponding to
* the groups' occurrence within the pattern, and
* - the results of named capture groups, at string keys matching their respective names,
* and second,
* the integer offset at which this first match occurs in the haystack string.
* Returns null, or a Match containing
* - the entire matching string, at key 0,
* - the results of unnamed capture groups, at integer keys corresponding to
* the groups' occurrence within the pattern, and
* - the results of named capture groups, at string keys matching their respective names,
*/
<<__Rx>> // not pure due to preg_match_with_matches + preg_last_error
<<__Pure>>
function regex_match<T as Regex\Match>(
string $haystack,
Regex\Pattern<T> $pattern,
int $offset = 0,
): ?(T, int) {
/* HH_FIXME[4200] keep suppressing warnings from bad callers */
/* HH_FIXME[4387] reported here as of 2020.09.21, hack v4.51.0 */
using new PHPWarningSuppressor();
inout int $offset,
): ?T {
$offset = validate_offset($offset, Str\length($haystack));
$match = darray[];
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
/* HH_FIXME[4200] Rx error without deregister_phpstdlib */
$status = \preg_match_with_matches(
/* HH_FIXME[4110] */ $pattern,
$haystack,
inout $match,
\PREG_FB__PRIVATE__HSL_IMPL | \PREG_OFFSET_CAPTURE,
$offset,
);
if ($status === 1) {
$match_out = darray[];
foreach ($match as $key => $value) {
$match_out[$key] = $value[0];
}
$offset_out = $match[0][1];
/* HH_FIXME[4110] Native function won't have this problem */
return tuple($match_out, $offset_out);
} else if ($status === 0) {
return null;
} else {
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
/* HH_FIXME[4200] Rx error without deregister_phpstdlib */
throw new Regex\Exception($pattern, \preg_last_error());
list ($matches, $error) = _Regex\match($haystack, $pattern, inout $offset);
if ($error is nonnull) {
throw new Regex\Exception($pattern, $error);
}
return $matches;
}
89 changes: 44 additions & 45 deletions src/regex/regex.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
* the groups' occurrence within the pattern, and
* - the results of named capture groups, at string keys matching their respective names.
*/
<<__Rx>>
<<__Pure>>
function first_match<T as Match>(
string $haystack,
Pattern<T> $pattern,
int $offset = 0,
): ?T {
return _Private\regex_match($haystack, $pattern, $offset)[0] ?? null;
return _Private\regex_match($haystack, $pattern, inout $offset);
}

/**
Expand All @@ -40,7 +40,7 @@ function first_match<T as Match>(
*
* Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`.
*/
<<__Rx>>
<<__Pure>>
function every_match<T as Match>(
string $haystack,
Pattern<T> $pattern,
Expand All @@ -49,21 +49,19 @@ function every_match<T as Match>(
$haystack_length = Str\length($haystack);
$result = vec[];
while (true) {
$match = _Private\regex_match($haystack, $pattern, $offset);
$match = _Private\regex_match($haystack, $pattern, inout $offset);
if ($match === null) {
break;
}
$captures = $match[0];
$result[] = $captures;
$match_begin = $match[1];
$match_length = Str\length(Shapes::at($captures, 0) as string);
$result[] = $match;
$match_length = Str\length(Shapes::at($match, 0) as string);
if ($match_length === 0) {
$offset = $match_begin + 1;
$offset++;
if ($offset > $haystack_length) {
break;
}
} else {
$offset = $match_begin + $match_length;
$offset += $match_length;
}
}
return $result;
Expand All @@ -76,13 +74,13 @@ function every_match<T as Match>(
*
* Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`.
*/
<<__Rx>>
<<__Pure>>
function matches(
string $haystack,
Pattern<Match> $pattern,
int $offset = 0,
): bool {
return _Private\regex_match($haystack, $pattern, $offset) !== null;
return _Private\regex_match($haystack, $pattern, inout $offset) !== null;
}

/**
Expand All @@ -93,31 +91,36 @@ function matches(
*
* Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`.
*/
<<__Pure>>
function replace(
string $haystack,
Pattern<Match> $pattern,
string $replacement,
int $offset = 0,
): string {
// replace is the only one of these functions that calls into a preg
// function other than preg_match. It needs to call into preg_replace
// to be able to handle backreferencing in the `$replacement` string.
// preg_replace does not support offsets, so we handle them ourselves,
// consistently with _Private\regex_match.
// replace is the only one of these functions that calls into a native
// helper other than match. It needs its own helper to be able to handle
// backreferencing in the `$replacement` string. Our offset handling is
// trivial so we do it here rather than pushing it down into the helper.
$offset = _Private\validate_offset($offset, Str\length($haystack));

if ($offset === 0) {
list ($result, $error) =
_Private\_Regex\replace($haystack, $pattern, $replacement);
if ($error is nonnull) {
throw new Regex\Exception($pattern, $error);
}
return $result as nonnull;
}

$haystack1 = Str\slice($haystack, 0, $offset);
$haystack2 = Str\slice($haystack, $offset);

using new _Private\PHPWarningSuppressor();
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
$haystack3 = \preg_replace($pattern, $replacement, $haystack2);
if ($haystack3 === null) {
/* HH_FIXME[2049] __PHPStdLib */
/* HH_FIXME[4107] __PHPStdLib */
throw new Exception($pattern, \preg_last_error());
list ($result, $error) =
_Private\_Regex\replace($haystack2, $pattern, $replacement);
if ($error is nonnull) {
throw new Regex\Exception($pattern, $error);
}
return $haystack1.$haystack3;
return $haystack1 . $result;
}

/**
Expand All @@ -128,7 +131,7 @@ function replace(
*
* Throws Invariant[Violation]Exception if `$offset` is not within plus/minus the length of `$haystack`.
*/
<<__Rx, __AtMostRxAsArgs>>
<<__Pure, __AtMostRxAsArgs>>
function replace_with<T as Match>(
string $haystack,
Pattern<T> $pattern,
Expand All @@ -139,22 +142,20 @@ function replace_with<T as Match>(
$result = Str\slice($haystack, 0, 0);
$match_end = 0;
while (true) {
$match = _Private\regex_match($haystack, $pattern, $offset);
$match = _Private\regex_match($haystack, $pattern, inout $offset);
if ($match === null) {
break;
}
$captures = $match[0];
$match_begin = $match[1];
// Copy anything between the previous match and this one
$result .= Str\slice($haystack, $match_end, $match_begin - $match_end);
$result .= $replace_func($captures);
$match_length = Str\length(Shapes::at($captures, 0) as string);
$match_end = $match_begin + $match_length;
$result .= Str\slice($haystack, $match_end, $offset - $match_end);
$result .= $replace_func($match);
$match_length = Str\length(Shapes::at($match, 0) as string);
$match_end = $offset + $match_length;
if ($match_length === 0) {
// To get the next match (and avoid looping forever), need to skip forward
// before searching again
// Note that `$offset` is for searching and `$match_end` is for copying
$offset = $match_begin + 1;
$offset++;
if ($offset > $haystack_length) {
break;
}
Expand All @@ -176,7 +177,7 @@ function replace_with<T as Match>(
*
* Throws Invariant[Violation]Exception if `$limit` < 2.
*/
<<__Rx>>
<<__Pure>>
function split(
string $haystack,
Pattern<Match> $delimiter,
Expand All @@ -195,27 +196,25 @@ function split(
$offset = 0;
$match_end = 0;
$count = 1;
$match = _Private\regex_match($haystack, $delimiter, $offset);
$match = _Private\regex_match($haystack, $delimiter, inout $offset);
while ($match && $count < $limit) {
$captures = $match[0];
$match_begin = $match[1];
// Copy anything between the previous match and this one
$result[] = Str\slice($haystack, $match_end, $match_begin - $match_end);
$match_length = Str\length(Shapes::at($captures, 0) as string);
$match_end = $match_begin + $match_length;
$result[] = Str\slice($haystack, $match_end, $offset - $match_end);
$match_length = Str\length(Shapes::at($match, 0) as string);
$match_end = $offset + $match_length;
if ($match_length === 0) {
// To get the next match (and avoid looping forever), need to skip forward
// before searching again
// Note that `$offset` is for searching and `$match_end` is for copying
$offset = $match_begin + 1;
$offset++;
if ($offset > $haystack_length) {
break;
}
} else {
$offset = $match_end;
}
$count++;
$match = _Private\regex_match($haystack, $delimiter, $offset);
$match = _Private\regex_match($haystack, $delimiter, inout $offset);
}
$result[] = Str\slice($haystack, $match_end);
return $result;
Expand Down

0 comments on commit 3f7a0e1

Please sign in to comment.