Skip to content

Commit

Permalink
Improve base32 package polyfill
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Feb 26, 2024
1 parent 100eb8b commit c5157e3
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 114 deletions.
144 changes: 144 additions & 0 deletions src/Base32/Base32.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
<?php

declare(strict_types=1);

namespace Bakame\Aide\Base32;

final class Base32
{
private function __construct(
private readonly string $alphabet,
private readonly string $padding,
) {
if (in_array($this->padding, ["\r", "\n"], true)) {
throw new \RuntimeException('The padding character is invalid.');
}

if (1 !== strlen($this->padding)) {
throw new \RuntimeException('The padding character must be one byte long.');
}

if (32 !== count(array_unique(str_split($alphabet)))) {
throw new \RuntimeException('The alphabet must be 32 bytes long containing unique characters.');
}

if (
str_contains($this->alphabet, "\r") ||
str_contains($this->alphabet, "\n") ||
str_contains($this->alphabet, $this->padding)
) {
throw new \RuntimeException('The alphabet contains invalid characters.');
}
}

public static function new(string $alphabet, string $padding): self
{
return new self($alphabet, $padding);
}

public function decode(string $encoded, bool $strict = false): string
{
if ('' === $encoded) {
return '';
}

if (!$strict) {
$encoded = strtoupper($encoded);
}

if (strtoupper($encoded) !== $encoded) {
throw new \RuntimeException('The encoded data contains non uppercased characters.');
}

$remainder = strlen($encoded) % 8;
if (0 !== $remainder) {
if ($strict) {
throw new \RuntimeException('The encoded data length is invalid.');
}

$encoded .= str_repeat($this->padding, $remainder);
}

$characters = $this->alphabet.$this->padding;
if (strspn($encoded, $characters) !== strlen($encoded)) {
if ($strict) {
throw new \RuntimeException('The encoded data contains characters unknown to the alphabet.');
}
$encoded = preg_replace('/[^'.preg_quote($characters, '/').']/', '', $encoded);
if ('' === $encoded || null === $encoded) {
return '';
}
}

$inside = rtrim($encoded, $this->padding);
if (str_contains($inside, $this->padding)) {
if ($strict) {
throw new \RuntimeException('The encoded data contains the padding characters.');
}
$encoded = str_replace($this->padding, '', $inside).substr($encoded, strlen($inside));
}

if ($strict && 1 !== preg_match('/^[^'.$this->padding.']+(('.$this->padding.'){3,4}|('.$this->padding.'){6}|'.$this->padding.')?$/', $encoded)) {
throw new \RuntimeException('The encoded data contains the padding characters.');
}

$decoded = '';
$len = strlen($encoded);
$n = 0;
$bitLen = 5;
$mapping = array_combine(str_split($characters), [...range(0, 31), 0]);
$val = $mapping[$encoded[0]];

while ($n < $len) {
if ($bitLen < 8) {
$val = $val << 5;
$bitLen += 5;
$n++;
$pentet = $encoded[$n] ?? $this->padding;
if ($this->padding === $pentet) {
$n = $len;
}
$val += $mapping[$pentet];
continue;
}

$shift = $bitLen - 8;
$decoded .= chr($val >> $shift);
$val = $val & ((1 << $shift) - 1);
$bitLen -= 8;
}

return $decoded;
}

public function encode(string $decoded): string
{
if ('' === $decoded) {
return '';
}

$encoded = '';
$n = 0;
$bitLen = 0;
$val = 0;
$len = strlen($decoded);
$decoded .= str_repeat(chr(0), 4);
$chars = (array) unpack('C*', $decoded);
$alphabet = $this->alphabet.$this->padding;

while ($n < $len || 0 !== $bitLen) {
if ($bitLen < 5) {
$val = $val << 8;
$bitLen += 8;
$n++;
$val += $chars[$n];
}
$shift = $bitLen - 5;
$encoded .= ($n - (int)($bitLen > 8) > $len && 0 == $val) ? $this->padding : $alphabet[$val >> $shift];
$val = $val & ((1 << $shift) - 1);
$bitLen -= 5;
}

return $encoded;
}
}
6 changes: 3 additions & 3 deletions src/Base32/Base32Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ public function it_will_base32_decode_on_hex_mode(string $decoded, string $encod
#[Test]
public function it_will_base32_encode_and_decode(string $string): void
{
self::assertSame($string, base32_decode(base32_encode($string)));
self::assertSame($string, base32_decode(base32_encode($string, PHP_BASE32_HEX), PHP_BASE32_HEX));
self::assertSame($string, base32_decode((string) base32_encode($string)));
self::assertSame($string, base32_decode((string) base32_encode($string, PHP_BASE32_HEX), PHP_BASE32_HEX));
}

#[DataProvider('invalidDecodingSequence')]
#[Test]
public function it_will_return_false_from_invalid_encoded_string_with_base32_decode_function(string $sequence, string $message, string $encoding): void
{
self::assertFalse(base32_decode($sequence, $encoding, true));
self::assertFalse(base32_decode($sequence, $encoding, '=', true));
}

/**
Expand Down
30 changes: 16 additions & 14 deletions src/Base32/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,27 @@ You need:
The package provides a userland base32 encoding and decoding mechanism.

```php
base32_encode(string $string, string $alphabet = PHP_BASE32_ASCII): string
base32_decode(string $string, string $alphabet = PHP_BASE32_ASCII, bool $strict = false): string
base32_encode(string $decoded, string $alphabet = PHP_BASE32_ASCII, $padding = '='): string
base32_decode(string $encoded, string $alphabet = PHP_BASE32_ASCII, $padding = '=', bool $strict = false): string
```

#### Parameters:

- `$string` : the data to encode for `base32_encode` or to decode for `base32_decode`
- `$alphabet` : the base32 alphabet, by default `PHP_BASE_ASCII`.
- `$decoded` : the data to encode for `base32_encode`
- `$encoded` : the data to decode for `base32_decode`
- `$alphabet` : the base32 alphabet, by default `PHP_BASE32_ASCII`.
- `$padding` : the padding character

If `$alphabet` is `PHP_BASE_ASCII`, conversion is performed per RFC4648 US-ASCII standard.
If `$alphabet` is `PHP_BASE_HEXC`, conversion is performed per RFC4648 HEX standard.
If `$alphabet` is `PHP_BASE32_ASCII` and the `$padding` is `=`, conversion is performed per RFC4648 US-ASCII standard.
If `$alphabet` is `PHP_BASE32_HEXC` and the `$padding` is `=`, conversion is performed per RFC4648 HEX standard.

**You can provide your own alphabet of 32 characters.**
**You can provide your own alphabet of 32 characters and your own padding character.**

- `$strict` : tell whether we need to perform strict decoding or not

If the strict parameter is set to `true` then the base32_decode() function will return `false`

- if encoded sequence lenght is invalid
- if encoded sequence length is invalid
- if the input contains character from outside the base64 alphabet.
- if padding is invalid
- if encoded characters are not all uppercased
Expand All @@ -53,10 +55,10 @@ otherwise listed constraints are silently ignored or discarded.
```php
<?php

base32_encode('Bangui'); // returns 'IJQW4Z3VNE======'
base32_decode('IJQW4Z3VNE======'); // returns 'Bangui'
base32_decode('IJQW4Z083VNE======'); // returns 'Bangui'
base32_decode('IJQW4Z083VNE======', PHP_BASE32_ASCII, true); // return false
base32_encode('Bangui', PHP_BASE32_HEX); // returns '89GMSPRLD4======'
base32_decode('89GMSPRLD4======', PHP_BASE32_HEX, true); // returns 'Bangui'
base32_encode('Bangui'); // returns 'IJQW4Z3VNE======'
base32_decode('IJQW4Z3VNE======'); // returns 'Bangui'
base32_decode('IJQW4Z083VNE======'); // returns 'Bangui'
base32_decode('IJQW4Z083VNE======', PHP_BASE32_ASCII, true); // return false
base32_encode('Bangui', PHP_BASE32_HEX, '*'); // returns '89GMSPRLD4******'
base32_decode('89GMSPRLD4******', PHP_BASE32_HEX, '*', true); // returns 'Bangui'
```
109 changes: 12 additions & 97 deletions src/Base32/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,121 +2,36 @@

declare(strict_types=1);

use Bakame\Aide\Base32\Base32;

defined('PHP_BASE32_ASCII') || define('PHP_BASE32_ASCII', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567');
defined('PHP_BASE32_HEX') || define('PHP_BASE32_HEX', '0123456789ABCDEFGHIJKLMNOPQRSTUV');

if (!function_exists('base32_encode')) {
function base32_encode(
string $decoded,
string $alphabet = PHP_BASE32_ASCII,
): string {
if ('' === $decoded) {
return '';
}

$encoded = '';
$n = 0;
$bitLen = 0;
$val = 0;
$len = strlen($decoded);
$decoded .= str_repeat(chr(0), 4);
$chars = (array) unpack('C*', $decoded);
$padding = '=';
$alphabet .= $padding;

while ($n < $len || 0 !== $bitLen) {
if ($bitLen < 5) {
$val = $val << 8;
$bitLen += 8;
$n++;
$val += $chars[$n];
}
$shift = $bitLen - 5;
$encoded .= ($n - (int)($bitLen > 8) > $len && 0 == $val) ? $padding : $alphabet[$val >> $shift];
$val = $val & ((1 << $shift) - 1);
$bitLen -= 5;
string $padding = '=',
): string|false {
try {
return Base32::new($alphabet, $padding)->encode($decoded);
} catch (RuntimeException) {
return false;
}

return $encoded;
}
}

if (!function_exists('base32_decode')) {
function base32_decode(
string $encoded,
string $alphabet = PHP_BASE32_ASCII,
string $padding = '=',
bool $strict = false
): string|false {
if ('' === $encoded) {
return '';
}

if (!$strict) {
$encoded = strtoupper($encoded);
}

if (strtoupper($encoded) !== $encoded) {
return false;
}

$padding = '=';
$remainder = strlen($encoded) % 8;
if (0 !== $remainder) {
if ($strict) {
return false;
}

$encoded .= str_repeat($padding, $remainder);
}

if (strspn($encoded, $alphabet.$padding) !== strlen($encoded)) {
if ($strict) {
return false;
}
$encoded = preg_replace('/[^'.preg_quote($alphabet.$padding, '/').']/', '', $encoded);
if ('' === $encoded || null === $encoded) {
return '';
}
}

$inside = rtrim($encoded, $padding);
if (str_contains($inside, $padding)) {
if ($strict) {
return false;
}
$encoded = str_replace($padding, '', $inside).substr($encoded, strlen($inside));
}

if ($strict && 1 !== preg_match('/^[^'.$padding.']+(('.$padding.'){3,4}|('.$padding.'){6}|'.$padding.')?$/', $encoded)) {
try {
return Base32::new($alphabet, $padding)->decode($encoded, $strict);
} catch (RuntimeException) {
return false;
}

$decoded = '';
$len = strlen($encoded);
$n = 0;
$bitLen = 5;
$mapping = array_combine(str_split($alphabet.$padding), [...range(0, 31), 0]);
$val = $mapping[$encoded[0]];

while ($n < $len) {
if ($bitLen < 8) {
$val = $val << 5;
$bitLen += 5;
$n++;
$pentet = $encoded[$n] ?? $padding;
if ($padding === $pentet) {
$n = $len;
}
$val += $mapping[$pentet];
continue;
}

$shift = $bitLen - 8;
$decoded .= chr($val >> $shift);
$val = $val & ((1 << $shift) - 1);
$bitLen -= 8;
}

return $decoded;
}
}

0 comments on commit c5157e3

Please sign in to comment.