diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 82ace8c..2ac597d 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -825,9 +825,10 @@ private function createNextToken(string $string, Token|null $previous = null): T $last = strpos($string, "\n"); $type = Token::TOKEN_TYPE_COMMENT; } else { // Comment until closing comment tag - $pos = strpos($string, '*/', 2); - assert($pos !== false); - $last = $pos + 2; + $pos = strpos($string, '*/', 2); + $last = $pos !== false + ? $pos + 2 + : false; $type = Token::TOKEN_TYPE_BLOCK_COMMENT; } diff --git a/tests/SqlFormatterTest.php b/tests/SqlFormatterTest.php index 05dd17f..92dd437 100644 --- a/tests/SqlFormatterTest.php +++ b/tests/SqlFormatterTest.php @@ -18,6 +18,7 @@ use function defined; use function explode; use function file_get_contents; +use function implode; use function pack; use function rtrim; use function sprintf; @@ -41,6 +42,7 @@ public function testFormatHighlight(string $sql, string $html): void } #[DataProvider('formatData')] + #[DataProvider('formatLongConcatData')] public function testFormat(string $sql, string $html): void { $formatter = new SqlFormatter(new NullHighlighter()); @@ -99,13 +101,22 @@ public function testUsePre(): void $this->assertSame($actual, $expected); } + /** @return string[] */ + private static function fileSqlData(): array + { + $contents = file_get_contents(__DIR__ . '/sql.sql'); + assert($contents !== false); + + return explode("\n---\n", rtrim($contents, "\n")); + } + /** @return Generator */ private static function fileDataProvider(string $file): Generator { $contents = file_get_contents(__DIR__ . '/' . $file); assert($contents !== false); $formatHighlightData = explode("\n---\n", rtrim($contents, "\n")); - $sqlData = self::sqlData(); + $sqlData = self::fileSqlData(); if (count($formatHighlightData) !== count($sqlData)) { throw new UnexpectedValueException(sprintf( '"%s" (%d sections) and sql.sql (%d sections) should have the same number of sections', @@ -138,6 +149,23 @@ public static function formatData(): Generator return self::fileDataProvider('format.txt'); } + /** @return Generator */ + public static function formatLongConcatData(): Generator + { + $sqlParts = []; + for ($i = 0; $i < 2_000; $i++) { + $sqlParts[] = 'cast(\'foo' . $i . '\' as blob)'; + } + + $inConcat = 'concat(' . implode(', ', $sqlParts) . ')'; + $outConcat = "concat(\n " . implode(",\n ", $sqlParts) . "\n )"; + + yield 'long concat' => [ + 'select iif(' . $inConcat . ' = ' . $inConcat . ', 10, 20) x', + "select\n iif(\n " . $outConcat . ' = ' . $outConcat . ",\n 10,\n 20\n ) x", + ]; + } + /** @return Generator */ public static function compressData(): Generator { @@ -149,13 +177,4 @@ public static function highlightData(): Generator { return self::fileDataProvider('highlight.html'); } - - /** @return mixed[] */ - private static function sqlData(): array - { - $contents = file_get_contents(__DIR__ . '/sql.sql'); - assert($contents !== false); - - return explode("\n---\n", rtrim($contents, "\n")); - } } diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php index 9f82319..f8a767e 100644 --- a/tests/TokenizerTest.php +++ b/tests/TokenizerTest.php @@ -4,13 +4,18 @@ namespace Doctrine\SqlFormatter\Tests; +use Doctrine\SqlFormatter\Cursor; +use Doctrine\SqlFormatter\Token; use Doctrine\SqlFormatter\Tokenizer; -use PHPUnit\Framework\Attributes\DoesNotPerformAssertions; +use Generator; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; use ReflectionClass; use function array_filter; +use function implode; use function preg_match; +use function serialize; use function sort; use function strtoupper; @@ -58,9 +63,131 @@ public function testKeywordsReservedAreSingleUpperWord(): void self::assertSame([], $kwsDiff); } - #[DoesNotPerformAssertions] - public function testThereAreNoRegressions(): void + /** @param list $expectedTokens */ + public static function assertEqualsTokens(array $expectedTokens, Cursor $cursor): void { - (new Tokenizer())->tokenize('*/'); + $tokens = []; + + $cursor = $cursor->subCursor(); + + while ($token = $cursor->next()) { + $tokens[] = $token; + } + + if (serialize($tokens) === serialize($expectedTokens)) { // optimize self::assertEquals() for large inputs + self::assertTrue(true); + } else { + self::assertEquals($expectedTokens, $tokens); + } + } + + /** @param list $expectedTokens */ + #[DataProvider('tokenizeData')] + #[DataProvider('tokenizeLongConcatData')] + public function testTokenize(array $expectedTokens, string $sql): void + { + self::assertEqualsTokens($expectedTokens, (new Tokenizer())->tokenize($sql)); + } + + /** @return Generator */ + public static function tokenizeData(): Generator + { + yield 'empty' => [ + [], + '', + ]; + + yield 'basic' => [ + [ + new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'), + new Token(Token::TOKEN_TYPE_WHITESPACE, ' '), + new Token(Token::TOKEN_TYPE_NUMBER, '1'), + ], + 'select 1', + ]; + + yield 'there are no regressions' => [ + [ + new Token(Token::TOKEN_TYPE_BOUNDARY, '*'), + new Token(Token::TOKEN_TYPE_BOUNDARY, '/'), + ], + '*/', + ]; + + yield 'unclosed quoted string' => [ + [ + new Token(Token::TOKEN_TYPE_QUOTE, '\'foo...'), + ], + '\'foo...', + ]; + + yield 'unclosed block comment' => [ + [ + new Token(Token::TOKEN_TYPE_BLOCK_COMMENT, '/* foo...'), + ], + '/* foo...', + ]; + } + + /** @return Generator */ + public static function tokenizeLongConcatData(): Generator + { + $count = 2_000; + + $sqlParts = []; + for ($i = 0; $i < $count; $i++) { + $sqlParts[] = 'cast(\'foo' . $i . '\' as blob)'; + } + + $concat = 'concat(' . implode(', ', $sqlParts) . ')'; + $sql = 'select iif(' . $concat . ' = ' . $concat . ', 10, 20) x'; + + $expectedTokens = [ + new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'), + new Token(Token::TOKEN_TYPE_WHITESPACE, ' '), + new Token(Token::TOKEN_TYPE_WORD, 'iif'), + new Token(Token::TOKEN_TYPE_BOUNDARY, '('), + ]; + + for ($j = 0; $j < 2; $j++) { + if ($j !== 0) { + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '='); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + } + + $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'concat'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '('); + + for ($i = 0; $i < $count; $i++) { + if ($i !== 0) { + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ','); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + } + + $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'cast'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '('); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_QUOTE, '\'foo' . $i . '\''); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'as'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'blob'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')'); + } + + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')'); + } + + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ','); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '10'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ','); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '20'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')'); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' '); + $expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'x'); + + yield 'long concat' => [$expectedTokens, $sql]; } } diff --git a/tests/clihighlight.txt b/tests/clihighlight.txt index c98a763..4d4386c 100644 --- a/tests/clihighlight.txt +++ b/tests/clihighlight.txt @@ -13,6 +13,8 @@ ORDER BY COUNT(order_id) DESC; --- + +--- UPDATE customers SET diff --git a/tests/compress.txt b/tests/compress.txt index 5805fce..fd507ba 100644 --- a/tests/compress.txt +++ b/tests/compress.txt @@ -1,4 +1,6 @@ SELECT customer_id, customer_name, COUNT(order_id) as total FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id GROUP BY customer_id, customer_name HAVING COUNT(order_id) > 5 ORDER BY COUNT(order_id) DESC; +--- + --- UPDATE customers SET totalorders = ordersummary.total FROM (SELECT customer_id, count(order_id) As total FROM orders GROUP BY customer_id) As ordersummary WHERE customers.customer_id = ordersummary.customer_id --- diff --git a/tests/format-highlight.html b/tests/format-highlight.html index 8d6521c..7c9b1d9 100644 --- a/tests/format-highlight.html +++ b/tests/format-highlight.html @@ -13,6 +13,8 @@ ORDER BY COUNT(order_id) DESC; --- +

+---
 
UPDATE
   customers
 SET
diff --git a/tests/format.txt b/tests/format.txt
index a41ffd3..ffa5cb7 100644
--- a/tests/format.txt
+++ b/tests/format.txt
@@ -13,6 +13,8 @@ HAVING
 ORDER BY
   COUNT(order_id) DESC;
 ---
+
+---
 UPDATE
   customers
 SET
diff --git a/tests/highlight.html b/tests/highlight.html
index 6eb6da8..6337210 100644
--- a/tests/highlight.html
+++ b/tests/highlight.html
@@ -4,6 +4,8 @@
 HAVING COUNT(order_id) > 5
 ORDER BY COUNT(order_id) DESC;
--- +

+---
 
UPDATE customers
         SET totalorders = ordersummary.total
         FROM (SELECT customer_id, count(order_id) As total
diff --git a/tests/sql.sql b/tests/sql.sql
index 267a504..d57da6e 100644
--- a/tests/sql.sql
+++ b/tests/sql.sql
@@ -3,6 +3,8 @@ FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id
 GROUP BY customer_id, customer_name
 HAVING COUNT(order_id) > 5
 ORDER BY COUNT(order_id) DESC;
+---
+
 ---
 UPDATE customers
         SET totalorders = ordersummary.total