Skip to content

Commit

Permalink
Improve base32 package implementation by removing regexp usage
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Mar 22, 2024
1 parent 4ab2451 commit 623e78e
Showing 1 changed file with 33 additions and 29 deletions.
62 changes: 33 additions & 29 deletions src/Base32/Base32.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ final class Base32
private function __construct(string $alphabet, string $padding)
{
$normalizeAlphabet = strtoupper($alphabet);

[$this->alphabet, $this->padding] = match (true) {
1 !== strlen($padding) => throw new ValueError('The padding character must a single character.'),
"\r" === $padding => throw new ValueError('The padding character can not be the carriage return character.'),
"\n" === $padding => throw new ValueError('The padding character can not be the newline escape sequence.'),
' ' === $padding => throw new ValueError('The padding character can not be the empty string sequence.'),
self::ALPHABET_SIZE !== strlen($alphabet) => throw new ValueError('The alphabet must be a 32 bytes long string.'),
str_contains($alphabet, "\r") => throw new ValueError('The alphabet can not contain the carriage return character.'),
str_contains($alphabet, "\n") => throw new ValueError('The alphabet can not contain the newline escape sequence.'),
str_contains($normalizeAlphabet, strtoupper($padding)) => throw new ValueError('The alphabet can not contain the padding character.'),
self::ALPHABET_SIZE !== count(array_unique(str_split($normalizeAlphabet))) => throw new ValueError('The alphabet must contain unique characters.'),
self::ALPHABET_SIZE !== strlen(count_chars($normalizeAlphabet, 3)) => throw new ValueError('The alphabet must contain unique characters.'), /* @phpstan-ignore-line */
default => [$alphabet, $padding],
};
}
Expand All @@ -52,60 +52,64 @@ public function decode(string $encoded, bool $strict = false): string
}

$alphabet = $this->alphabet;
$encoded = str_replace(["\r", "\n"], [''], $encoded);
$padding = $this->padding;
$encoded = str_replace(["\r", "\n", ' '], [''], $encoded);
if (!$strict) {
$alphabet = strtoupper($alphabet);
$encoded = str_replace(strtoupper($this->padding), $this->padding, strtoupper($encoded));
$padding = strtoupper($padding);
$encoded = strtoupper($encoded);
}

$remainder = strlen($encoded) % 8;
if (0 !== $remainder) {
if ($strict) {
$encoded .= !$strict ?
str_repeat($padding, $remainder) :
throw new RuntimeException('The encoded data length is invalid.');
}

$encoded .= str_repeat($this->padding, $remainder);
}

$characters = $alphabet.$this->padding;
if (strspn($encoded, $characters) !== strlen($encoded)) {
if ($strict) {
throw new RuntimeException('The encoded data contains characters unknown to the alphabet.');
}
$encoded = preg_replace('/[^'.preg_quote($characters, '/').']/', '', $encoded);
if ('' === $encoded || null === $encoded) {
return '';
}
$inside = rtrim($encoded, $padding);
$end = substr($encoded, strlen($inside));
if ($strict && !in_array(strlen($end), [0, 1, 3, 4, 6], true)) {
throw new RuntimeException('The encoded data ends with an invalid padding sequence length.');
}

$inside = rtrim($encoded, $this->padding);
if (str_contains($inside, $this->padding)) {
if ($strict) {
throw new RuntimeException('The encoded data contains the padding character.');
}
$encoded = str_replace($this->padding, '', $inside).substr($encoded, strlen($inside));
if (str_contains($inside, $padding)) {
$encoded = !$strict ?
str_replace($padding, '', $inside).$end :
throw new RuntimeException('The padding character is used inside the encoded data in an invalid place.');
}

if ($strict && 1 !== preg_match('/^[^'.$this->padding.']+(('.$this->padding.'){3,4}|('.$this->padding.'){6}|'.$this->padding.')?$/', $encoded)) {
throw new RuntimeException('The encoded data contains the padding character.');
$characters = $alphabet.$padding;
if ($strict && (strspn($encoded, $characters) !== strlen($encoded))) {
throw new RuntimeException('The encoded data contains characters unknown to the base32 alphabet.');
}

$decoded = '';
$offset = 0;
$bitLen = 5;
$length = strlen($encoded);
$chars = array_combine(str_split($characters), [...range(0, 31), 0]);
$val = $chars[$encoded[0]];
$val = $chars[$encoded[$offset]] ?? -1;

while ($offset < $length) {
if (!$strict && -1 === $val) {
$offset++;
if ($offset >= $length) {
break;
}
$val = $chars[$encoded[$offset]] ?? -1;
continue;
}

if ($bitLen < 8) {
$bitLen += 5;
$offset++;
$pentet = $encoded[$offset] ?? $this->padding;
if ($this->padding === $pentet) {
$pentet = $encoded[$offset] ?? $padding;
if ($padding === $pentet) {
$offset = $length;
}
$val = ($val << 5) + $chars[$pentet];

$val = ($val << 5) + ($chars[$pentet] ?? 0);
continue;
}

Expand Down

0 comments on commit 623e78e

Please sign in to comment.