Skip to content

Commit

Permalink
Update base32 polyfill implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Jan 6, 2024
1 parent f8667e6 commit ede6aca
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 144 deletions.
153 changes: 35 additions & 118 deletions src/Base32/Base32.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

use function chr;
use function preg_match;
use function preg_quote;
use function preg_replace;
use function rtrim;
use function str_contains;
use function str_repeat;
use function strlen;
use function strspn;
use function strtoupper;
use function unpack;

Expand All @@ -19,108 +21,17 @@
*
* Based on https://github.com/ChristianRiesen/base32/blob/master/src/Base32.php class
*/
enum Base32
final class Base32
{
case Ascii;
case Hex;

private function alphabet(): string
{
return match ($this) {
self::Ascii => 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=',
self::Hex => '0123456789ABCDEFGHIJKLMNOPQRSTUV=',
};
}

private function pattern(): string
{
return match ($this) {
self::Ascii => '/[^A-Z2-7=]/',
self::Hex => '/[^0-9A-V=]/',
};
}

/**
* @return array<array-key, int>
*/
private function mapping(): array
{
return match ($this) {
self::Ascii => [
'=' => 0b00000,
'A' => 0b00000,
'B' => 0b00001,
'C' => 0b00010,
'D' => 0b00011,
'E' => 0b00100,
'F' => 0b00101,
'G' => 0b00110,
'H' => 0b00111,
'I' => 0b01000,
'J' => 0b01001,
'K' => 0b01010,
'L' => 0b01011,
'M' => 0b01100,
'N' => 0b01101,
'O' => 0b01110,
'P' => 0b01111,
'Q' => 0b10000,
'R' => 0b10001,
'S' => 0b10010,
'T' => 0b10011,
'U' => 0b10100,
'V' => 0b10101,
'W' => 0b10110,
'X' => 0b10111,
'Y' => 0b11000,
'Z' => 0b11001,
'2' => 0b11010,
'3' => 0b11011,
'4' => 0b11100,
'5' => 0b11101,
'6' => 0b11110,
'7' => 0b11111,
],
self::Hex => [
'=' => 0b00000,
'0' => 0b00000,
'1' => 0b00001,
'2' => 0b00010,
'3' => 0b00011,
'4' => 0b00100,
'5' => 0b00101,
'6' => 0b00110,
'7' => 0b00111,
'8' => 0b01000,
'9' => 0b01001,
'A' => 0b01010,
'B' => 0b01011,
'C' => 0b01100,
'D' => 0b01101,
'E' => 0b01110,
'F' => 0b01111,
'G' => 0b10000,
'H' => 0b10001,
'I' => 0b10010,
'J' => 0b10011,
'K' => 0b10100,
'L' => 0b10101,
'M' => 0b10110,
'N' => 0b10111,
'O' => 0b11000,
'P' => 0b11001,
'Q' => 0b11010,
'R' => 0b11011,
'S' => 0b11100,
'T' => 0b11101,
'U' => 0b11110,
'V' => 0b11111,
],
};
}

public function encode(string $decoded): string
{
public const PADDING_CHARACTER = '=';
public const ASCII = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567';
public const HEX = '0123456789ABCDEFGHIJKLMNOPQRSTUV';

public static function encode(
string $decoded,
string $alphabet = self::ASCII,
string $padding = self::PADDING_CHARACTER
): string {
if ('' === $decoded) {
return '';
}
Expand All @@ -132,7 +43,7 @@ public function encode(string $decoded): string
$len = strlen($decoded);
$decoded .= str_repeat(chr(0), 4);
$chars = (array) unpack('C*', $decoded);
$alphabet = $this->alphabet();
$alphabet .= $padding;

while ($n < $len || 0 !== $bitLen) {
if ($bitLen < 5) {
Expand All @@ -142,7 +53,7 @@ public function encode(string $decoded): string
$val += $chars[$n];
}
$shift = $bitLen - 5;
$encoded .= ($n - (int)($bitLen > 8) > $len && 0 == $val) ? '=' : $alphabet[$val >> $shift];
$encoded .= ($n - (int)($bitLen > 8) > $len && 0 == $val) ? $padding : $alphabet[$val >> $shift];
$val = $val & ((1 << $shift) - 1);
$bitLen -= 5;
}
Expand All @@ -153,8 +64,11 @@ public function encode(string $decoded): string
/**
* @throws Base32Exception if the encoded string is invalid
*/
public function decode(string $encoded): string
{
public static function decode(
string $encoded,
string $alphabet = self::ASCII,
string $padding = self::PADDING_CHARACTER
): string {
if ('' === $encoded) {
return '';
}
Expand All @@ -167,31 +81,34 @@ public function decode(string $encoded): string
throw new Base32Exception('The encoded string length is not a multiple of 8.');
}

if (str_contains(rtrim($encoded, '='), '=')) {
throw new Base32Exception('A padding character is contained in the middle of the encoded string.');
if (strspn($encoded, $alphabet.$padding) !== strlen($encoded)) {
throw new Base32Exception('The encoded string contains characters outside of the base32 alphabet.');
}

if (1 !== preg_match('/^[^=]+((=){3,4}|(=){6}|=)?$/', $encoded)) {
throw new Base32Exception('The encoded string contains an invalid padding length.');
if (str_contains(rtrim($encoded, $padding), $padding)) {
throw new Base32Exception('A padding character is contained in the middle of the encoded string.');
}

if (1 === preg_match($this->pattern(), $encoded)) {
throw new Base32Exception('The encoded string contains characters outside of the base32 '.(Base32::Hex === $this ? 'Extended Hex' : 'US-ASCII').' alphabet.');
if ('' !== $padding && 1 !== preg_match('/^[^'.$padding.']+(('.$padding.'){3,4}|('.$padding.'){6}|'.$padding.')?$/', $encoded)) {
throw new Base32Exception('The encoded string contains an invalid padding length.');
}

return $this->decodeLax($encoded);
return self::decodeLax($encoded, $alphabet, $padding);
}

public function decodeLax(string $encoded): string
{
public static function decodeLax(
string $encoded,
string $alphabet = self::ASCII,
string $padding = self::PADDING_CHARACTER
): string {
$encoded = strtoupper($encoded);
$encoded = preg_replace($this->pattern(), '', $encoded);
$encoded = preg_replace('/[^'.preg_quote($alphabet, '/').$padding.']/', '', $encoded);
if ('' === $encoded || null === $encoded) {
return '';
}

$decoded = '';
$mapping = $this->mapping();
$mapping = array_combine(str_split($alphabet.$padding), [...range(0, 31), 0]);
$len = strlen($encoded);
$n = 0;
$bitLen = 5;
Expand All @@ -202,8 +119,8 @@ public function decodeLax(string $encoded): string
$val = $val << 5;
$bitLen += 5;
$n++;
$pentet = $encoded[$n] ?? '=';
if ('=' === $pentet) {
$pentet = $encoded[$n] ?? $padding;
if ($padding === $pentet) {
$n = $len;
}
$val += $mapping[$pentet];
Expand Down
16 changes: 8 additions & 8 deletions src/Base32/Base32Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,21 @@ public function it_will_base32_encode_and_decode(string $string): void

#[DataProvider('invalidDecodingSequence')]
#[Test]
public function it_will_return_false_from_invalid_encoded_string_with_base32_decode_function(string $sequence, string $message, int $encoding): void
public function it_will_return_false_from_invalid_encoded_string_with_base32_decode_function(string $sequence, string $message, string $encoding): void
{
self::assertFalse(base32_decode($sequence, $encoding, true));
self::assertFalse(base32_decode($sequence, $encoding, true, true));
}

#[DataProvider('invalidDecodingSequence')]
#[Test]
public function it_will_throw_from_invalid_encoded_string_with_base32_decode_method_on_strict_mode(string $sequence, string $message, int $encoding): void
public function it_will_throw_from_invalid_encoded_string_with_base32_decode_method_on_strict_mode(string $sequence, string $message, string $encoding): void
{
$this->expectException(Base32Exception::class);
$this->expectExceptionMessage($message);

match ($encoding) {
PHP_BASE32_HEX => Base32::Hex->decode($sequence),
default => Base32::Ascii->decode($sequence),
PHP_BASE32_HEX => Base32::decode($sequence, PHP_BASE32_HEX),
default => Base32::decode($sequence, PHP_BASE32_ASCII),
};
}

Expand Down Expand Up @@ -164,19 +164,19 @@ public static function backAndForthDataProvider(): array
}

/**
* @return iterable<string, array{sequence: string, message: string, encoding: int}>
* @return iterable<string, array{sequence: string, message: string, encoding: string}>
*/
public static function invalidDecodingSequence(): iterable
{
yield 'characters outside of base32 extended hex alphabet' => [
'sequence' => 'MZXQ====',
'message' => 'The encoded string contains characters outside of the base32 Extended Hex alphabet.',
'message' => 'The encoded string contains characters outside of the base32 alphabet.',
'encoding' => PHP_BASE32_HEX,
];

yield 'characters outside of base32 us ascii alphabet' => [
'sequence' => '90890808',
'message' => 'The encoded string contains characters outside of the base32 US-ASCII alphabet.',
'message' => 'The encoded string contains characters outside of the base32 alphabet.',
'encoding' => PHP_BASE32_ASCII,
];

Expand Down
34 changes: 16 additions & 18 deletions src/Base32/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,33 @@
use Bakame\Aide\Base32\Base32;
use Bakame\Aide\Base32\Base32Exception;

defined('PHP_BASE32_ASCII') || define('PHP_BASE32_ASCII', 1);
defined('PHP_BASE32_HEX') || define('PHP_BASE32_HEX', 2);
defined('PHP_BASE32_ASCII') || define('PHP_BASE32_ASCII', Base32::ASCII);
defined('PHP_BASE32_HEX') || define('PHP_BASE32_HEX', Base32::HEX);

if (!function_exists('base32_encode')) {
function base32_encode(string $decoded, int $encoding = PHP_BASE32_ASCII): string
{
$base32 = match ($encoding) {
PHP_BASE32_HEX => Base32::Hex,
default => Base32::Ascii,
};

return $base32->encode($decoded);
function base32_encode(
string $decoded,
string $alphabet = PHP_BASE32_ASCII,
bool $usePadding = true
): string {
return Base32::encode($decoded, $alphabet, $usePadding ? Base32::PADDING_CHARACTER : '');
}
}

if (!function_exists('base32_decode')) {
function base32_decode(string $encoded, int $encoding = PHP_BASE32_ASCII, bool $strict = false): string|false
{
$base32 = match ($encoding) {
PHP_BASE32_HEX => Base32::Hex,
default => Base32::Ascii,
};
function base32_decode(
string $encoded,
string $alphabet = PHP_BASE32_ASCII,
bool $usePadding = true,
bool $strict = false
): string|false {

if (!$strict) {
return $base32->decodeLax($encoded);
return Base32::decodeLax($encoded, $alphabet, $usePadding ? Base32::PADDING_CHARACTER : '');
}

try {
return $base32->decode($encoded);
return Base32::decode($encoded, $alphabet, $usePadding ? Base32::PADDING_CHARACTER : '');
} catch (Base32Exception) {
return false;
}
Expand Down

0 comments on commit ede6aca

Please sign in to comment.