-
Notifications
You must be signed in to change notification settings - Fork 263
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fixup! fix(encoding): better character encoding
- Loading branch information
1 parent
b557369
commit 4814b04
Showing
3 changed files
with
180 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
<?php | ||
/* | ||
* * | ||
* * | ||
* * @copyright 2023 Anna Larch <[email protected]> | ||
* * | ||
* * @author Anna Larch <[email protected]> | ||
* * | ||
* * This library is free software; you can redistribute it and/or | ||
* * modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE | ||
* * License as published by the Free Software Foundation; either | ||
* * version 3 of the License, or any later version. | ||
* * | ||
* * This library is distributed in the hope that it will be useful, | ||
* * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* * GNU AFFERO GENERAL PUBLIC LICENSE for more details. | ||
* * | ||
* * You should have received a copy of the GNU Affero General Public | ||
* * License along with this library. If not, see <http://www.gnu.org/licenses/>. | ||
* * | ||
* | ||
*/ | ||
|
||
namespace OCA\Mail\IMAP\Charset; | ||
|
||
use Horde_Mime_Part; | ||
use OCA\Mail\Exception\ServiceException; | ||
|
||
class Converter { | ||
|
||
/** | ||
* @param Horde_Mime_Part $p | ||
* @return string | ||
* @throws ServiceException | ||
*/ | ||
public function convert(Horde_Mime_Part $p): string { | ||
$data = $p->getContents(); | ||
if ($data === null) { | ||
return ''; | ||
} | ||
|
||
// Only convert encoding if it is explicitly specified in the header because text/calendar | ||
// data is utf-8 by default. | ||
$charset = $p->getCharset(); | ||
if($charset !== null && strtoupper($charset) === 'UTF-8') { | ||
return $data; | ||
} | ||
|
||
// No charset specified | ||
$charset = mb_detect_encoding($data, 'UTF-8', true); | ||
if (!$charset) { | ||
// Fallback | ||
$charset = mb_detect_encoding($data, null, true); | ||
} | ||
|
||
// Still UTF8, no need to convert | ||
if($charset !== null && strtoupper($charset) === 'UTF-8') { | ||
Check failure on line 58 in lib/IMAP/Charset/Converter.php GitHub Actions / Nextcloud dev-masterRedundantCondition
Check failure on line 58 in lib/IMAP/Charset/Converter.php GitHub Actions / Nextcloud dev-stable27RedundantCondition
|
||
return $data; | ||
} | ||
|
||
$converted = @mb_convert_encoding($data, 'UTF-8', $charset); | ||
if (!$converted) { | ||
// Might be a charset that PHP mb doesn't know how to handle, fall back to iconv | ||
$converted = iconv($charset, 'UTF-8', $data); | ||
} | ||
|
||
if ($converted === false) { | ||
throw new ServiceException('Could not detect message charset'); | ||
} | ||
return $converted; | ||
Check failure on line 71 in lib/IMAP/Charset/Converter.php GitHub Actions / Nextcloud dev-masterInvalidReturnStatement
Check failure on line 71 in lib/IMAP/Charset/Converter.php GitHub Actions / Nextcloud dev-stable27InvalidReturnStatement
Check failure on line 71 in lib/IMAP/Charset/Converter.php GitHub Actions / Nextcloud dev-stable26InvalidReturnStatement
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
/** | ||
* @copyright 2023 Anna Larch <[email protected]> | ||
* @author Anna Larch <[email protected]> | ||
* | ||
* | ||
* This code is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License, version 3, | ||
* as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License, version 3, | ||
* along with this program. If not, see <http://www.gnu.org/licenses/> | ||
* | ||
*/ | ||
|
||
namespace OCA\Mail\Tests\Unit\IMAP\Charset; | ||
|
||
use ChristophWurst\Nextcloud\Testing\TestCase; | ||
use Horde_Imap_Client; | ||
use Horde_Imap_Client_Data_Fetch; | ||
use Horde_Imap_Client_Fetch_Query; | ||
use Horde_Imap_Client_Fetch_Results; | ||
use Horde_Imap_Client_Ids; | ||
use Horde_Imap_Client_Socket; | ||
use Horde_Mime_Part; | ||
use OCA\Mail\Db\Mailbox; | ||
use OCA\Mail\IMAP\Charset\Converter; | ||
use OCA\Mail\IMAP\ImapMessageFetcher; | ||
use OCA\Mail\IMAP\ImapMessageFetcherFactory; | ||
use OCA\Mail\IMAP\MessageMapper; | ||
use OCA\Mail\Model\IMAPMessage; | ||
use OCA\Mail\Service\SmimeService; | ||
use OCA\Mail\Support\PerformanceLoggerTask; | ||
use PHPUnit\Framework\MockObject\MockObject; | ||
use Psr\Log\LoggerInterface; | ||
use function range; | ||
|
||
class ConverterTest extends TestCase { | ||
|
||
public Converter $converter; | ||
protected function setUp(): void { | ||
parent::setUp(); | ||
|
||
$this->converter = new Converter(); | ||
} | ||
|
||
/** | ||
* @dataProvider dataProviderMimeParts | ||
*/ | ||
public function testConvert($mimePart, $expected): void { | ||
$actual = $this->converter->convert($mimePart); | ||
$this->assertEquals($expected, $actual); | ||
$isUtf8 = mb_check_encoding($actual, 'UTF-8'); | ||
$this->assertTrue($isUtf8); | ||
} | ||
|
||
public function dataProviderMimeParts(): array { | ||
// UTF8 | ||
$utfMimePart = new Horde_Mime_Part(); | ||
$utfMimePart->setCharset('UTF-8'); | ||
$utfMimePart->setContents('😊'); | ||
// Hebrew | ||
$iso88591MimePart = new Horde_Mime_Part(); | ||
$iso88591MimePart->setCharset('ISO 8859-1'); | ||
$iso88591MimePart->setContents('בה בדף לחבר ממונרכיה, בקר בגרסה ואמנות דת'); | ||
$iso88591MimePart_noCharset = new Horde_Mime_Part(); | ||
$iso88591MimePart_noCharset->setContents('בה בדף לחבר ממונרכיה, בקר בגרסה ואמנות דת'); | ||
// Japanese | ||
$iso2022jpMimePart = new Horde_Mime_Part(); | ||
$iso2022jpMimePart->setCharset('ISO-2022-JP'); | ||
$iso2022jpMimePart->setContents('外せ園査リツハワ題'); | ||
$iso2022jpMimePart_noCharset = new Horde_Mime_Part(); | ||
$iso2022jpMimePart_noCharset->setContents('外せ園査リツハワ題'); | ||
// Korean - not in mb | ||
$iso106461MimePart = new Horde_Mime_Part(); | ||
$iso106461MimePart->setCharset('ISO 10646-1'); | ||
$iso106461MimePart->setContents('언론·출판은 타인의 명'); | ||
// Arabic - not in mb | ||
$windowsMimePart = new Horde_Mime_Part(); | ||
$windowsMimePart->setCharset('Windows-1256'); | ||
$windowsMimePart->setContents('قام زهاء أوراقهم ما,'); | ||
|
||
return[ | ||
[$utfMimePart, '😊'], | ||
[$iso88591MimePart, 'בה בדף לחבר ממונרכיה, בקר בגרסה ואמנות דת'], | ||
[$iso88591MimePart_noCharset, 'בה בדף לחבר ממונרכיה, בקר בגרסה ואמנות דת'], | ||
[$iso2022jpMimePart, '外せ園査リツハワ題'], | ||
[$iso2022jpMimePart_noCharset, '外せ園査リツハワ題'], | ||
[$iso106461MimePart, '언론·출판은 타인의 명'], | ||
[$windowsMimePart, 'قام زهاء أوراقهم ما,'] | ||
]; | ||
} | ||
} |