From fac906aeb4493eb6508486c507a23487774578c0 Mon Sep 17 00:00:00 2001 From: Anna Larch Date: Mon, 20 Dec 2021 16:17:26 +0100 Subject: [PATCH] Checnge how charsets are detected for emails Signed-off-by: Anna Larch --- lib/Model/IMAPMessage.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/Model/IMAPMessage.php b/lib/Model/IMAPMessage.php index 0b3c38f505..e0f3740648 100644 --- a/lib/Model/IMAPMessage.php +++ b/lib/Model/IMAPMessage.php @@ -48,10 +48,12 @@ use OCA\Mail\Db\MailAccount; use OCA\Mail\Db\Message; use OCA\Mail\Db\Tag; +use OCA\Mail\Exception\ServiceException; use OCA\Mail\Service\Html; use OCP\AppFramework\Db\DoesNotExistException; use OCP\Files\File; use OCP\Files\SimpleFS\ISimpleFile; +use ValueError; use function in_array; use function mb_convert_encoding; use function mb_strcut; @@ -588,6 +590,7 @@ private function handleHtmlMessage(Horde_Mime_Part $p, $partNo): void { * @return string * @throws DoesNotExistException * @throws Exception + * @throws ServiceException */ private function loadBodyData(Horde_Mime_Part $p, $partNo): string { // DECODE DATA @@ -619,7 +622,22 @@ private function loadBodyData(Horde_Mime_Part $p, $partNo): string { $p->setContents($data); $data = $p->getContents(); - $data = mb_convert_encoding($data, 'UTF-8', $p->getCharset()); + $charset = mb_detect_encoding($data, 'UTF-8', true); + + if (!$charset) { + $charset = $p->getCharset(); + } + + try { + $data = @mb_convert_encoding($data, 'UTF-8', $charset); + } catch (ValueError $e) { + throw new ServiceException('Could not detect charset for message ' . $e->getMessage(), $e->getCode()); + } + + if (!$data) { + throw new ServiceException('Could not detect charset for message'); + } + return $data; }