Skip to content

Commit

Permalink
Fix decoding non-ascii attachment names from TNEF attachments (#5646, #…
Browse files Browse the repository at this point in the history
  • Loading branch information
alecpl committed Jun 27, 2017
1 parent a80f7c5 commit b4c877b
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CHANGELOG Roundcube Webmail
===========================

- Password: Fix compatibility with PHP 7+ in cpanel_webmail driver (#5820)
- Fix decoding non-ascii attachment names from TNEF attachments (#5646, #5799)

RELEASE 1.3.0
-------------
Expand Down
105 changes: 105 additions & 0 deletions program/lib/Roundcube/rcube_charset.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,111 @@ class rcube_charset
'WINDOWS949' => 'UHC',
);

/**
* Windows codepages
*
* @var array
*/
static public $windows_codepages = array(
37 => 'IBM037', // IBM EBCDIC US-Canada
437 => 'IBM437', // OEM United States
500 => 'IBM500', // IBM EBCDIC International
708 => 'ASMO-708', // Arabic (ASMO 708)
720 => 'DOS-720', // Arabic (Transparent ASMO); Arabic (DOS)
737 => 'IBM737', // OEM Greek (formerly 437G); Greek (DOS)
775 => 'IBM775', // OEM Baltic; Baltic (DOS)
850 => 'IBM850', // OEM Multilingual Latin 1; Western European (DOS)
852 => 'IBM852', // OEM Latin 2; Central European (DOS)
855 => 'IBM855', // OEM Cyrillic (primarily Russian)
857 => 'IBM857', // OEM Turkish; Turkish (DOS)
858 => 'IBM00858', // OEM Multilingual Latin 1 + Euro symbol
860 => 'IBM860', // OEM Portuguese; Portuguese (DOS)
861 => 'IBM861', // OEM Icelandic; Icelandic (DOS)
862 => 'DOS-862', // OEM Hebrew; Hebrew (DOS)
863 => 'IBM863', // OEM French Canadian; French Canadian (DOS)
864 => 'IBM864', // OEM Arabic; Arabic (864)
865 => 'IBM865', // OEM Nordic; Nordic (DOS)
866 => 'cp866', // OEM Russian; Cyrillic (DOS)
869 => 'IBM869', // OEM Modern Greek; Greek, Modern (DOS)
870 => 'IBM870', // IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
874 => 'windows-874', // ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
875 => 'cp875', // IBM EBCDIC Greek Modern
932 => 'shift_jis', // ANSI/OEM Japanese; Japanese (Shift-JIS)
936 => 'gb2312', // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
950 => 'big5', // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
1026 => 'IBM1026', // IBM EBCDIC Turkish (Latin 5)
1047 => 'IBM01047', // IBM EBCDIC Latin 1/Open System
1140 => 'IBM01140', // IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
1141 => 'IBM01141', // IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
1142 => 'IBM01142', // IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
1143 => 'IBM01143', // IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
1144 => 'IBM01144', // IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
1145 => 'IBM01145', // IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
1146 => 'IBM01146', // IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
1147 => 'IBM01147', // IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
1148 => 'IBM01148', // IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
1149 => 'IBM01149', // IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
1200 => 'UTF-16', // Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
1201 => 'UTF-16BE', // Unicode UTF-16, big endian byte order; available only to managed applications
1250 => 'windows-1250', // ANSI Central European; Central European (Windows)
1251 => 'windows-1251', // ANSI Cyrillic; Cyrillic (Windows)
1252 => 'windows-1252', // ANSI Latin 1; Western European (Windows)
1253 => 'windows-1253', // ANSI Greek; Greek (Windows)
1254 => 'windows-1254', // ANSI Turkish; Turkish (Windows)
1255 => 'windows-1255', // ANSI Hebrew; Hebrew (Windows)
1256 => 'windows-1256', // ANSI Arabic; Arabic (Windows)
1257 => 'windows-1257', // ANSI Baltic; Baltic (Windows)
1258 => 'windows-1258', // ANSI/OEM Vietnamese; Vietnamese (Windows)
10000 => 'macintosh', // MAC Roman; Western European (Mac)
12000 => 'UTF-32', // Unicode UTF-32, little endian byte order; available only to managed applications
12001 => 'UTF-32BE', // Unicode UTF-32, big endian byte order; available only to managed applications
20127 => 'US-ASCII', // US-ASCII (7-bit)
20273 => 'IBM273', // IBM EBCDIC Germany
20277 => 'IBM277', // IBM EBCDIC Denmark-Norway
20278 => 'IBM278', // IBM EBCDIC Finland-Sweden
20280 => 'IBM280', // IBM EBCDIC Italy
20284 => 'IBM284', // IBM EBCDIC Latin America-Spain
20285 => 'IBM285', // IBM EBCDIC United Kingdom
20290 => 'IBM290', // IBM EBCDIC Japanese Katakana Extended
20297 => 'IBM297', // IBM EBCDIC France
20420 => 'IBM420', // IBM EBCDIC Arabic
20423 => 'IBM423', // IBM EBCDIC Greek
20424 => 'IBM424', // IBM EBCDIC Hebrew
20838 => 'IBM-Thai', // IBM EBCDIC Thai
20866 => 'koi8-r', // Russian (KOI8-R); Cyrillic (KOI8-R)
20871 => 'IBM871', // IBM EBCDIC Icelandic
20880 => 'IBM880', // IBM EBCDIC Cyrillic Russian
20905 => 'IBM905', // IBM EBCDIC Turkish
20924 => 'IBM00924', // IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
20932 => 'EUC-JP', // Japanese (JIS 0208-1990 and 0212-1990)
20936 => 'cp20936', // Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
20949 => 'cp20949', // Korean Wansung
21025 => 'cp1025', // IBM EBCDIC Cyrillic Serbian-Bulgarian
21866 => 'koi8-u', // Ukrainian (KOI8-U); Cyrillic (KOI8-U)
28591 => 'iso-8859-1', // ISO 8859-1 Latin 1; Western European (ISO)
28592 => 'iso-8859-2', // ISO 8859-2 Central European; Central European (ISO)
28593 => 'iso-8859-3', // ISO 8859-3 Latin 3
28594 => 'iso-8859-4', // ISO 8859-4 Baltic
28595 => 'iso-8859-5', // ISO 8859-5 Cyrillic
28596 => 'iso-8859-6', // ISO 8859-6 Arabic
28597 => 'iso-8859-7', // ISO 8859-7 Greek
28598 => 'iso-8859-8', // ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
28599 => 'iso-8859-9', // ISO 8859-9 Turkish
28603 => 'iso-8859-13', // ISO 8859-13 Estonian
28605 => 'iso-8859-15', // ISO 8859-15 Latin 9
38598 => 'iso-8859-8-i', // ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
50220 => 'iso-2022-jp', // ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
50221 => 'csISO2022JP', // ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
50222 => 'iso-2022-jp', // ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
50225 => 'iso-2022-kr', // ISO 2022 Korean
51932 => 'EUC-JP', // EUC Japanese
51936 => 'EUC-CN', // EUC Simplified Chinese; Chinese Simplified (EUC)
51949 => 'EUC-KR', // EUC Korean
52936 => 'hz-gb-2312', // HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
54936 => 'GB18030', // Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
65000 => 'UTF-7',
65001 => 'UTF-8',
);

/**
* Catch an error and throw an exception.
Expand Down
144 changes: 103 additions & 41 deletions program/lib/Roundcube/rcube_tnef_decoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/**
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2008-2014, The Roundcube Dev Team |
| Copyright (C) 2008-2017, The Roundcube Dev Team |
| Copyright (C) 2002-2010, The Horde Project (http://www.horde.org/) |
| |
| Licensed under the GNU General Public License version 3 or |
Expand All @@ -15,6 +15,7 @@
+-----------------------------------------------------------------------+
| Author: Jan Schneider <[email protected]> |
| Author: Michael Slusarz <[email protected]> |
| Author: Aleksander Machniak <[email protected]> |
+-----------------------------------------------------------------------+
*/

Expand All @@ -29,41 +30,70 @@
*/
class rcube_tnef_decoder
{
const SIGNATURE = 0x223e9f78;
const LVL_MESSAGE = 0x01;
const LVL_ATTACHMENT = 0x02;

const ASUBJECT = 0x88004;
const AMCLASS = 0x78008;
const ATTACHDATA = 0x6800f;
const AFILENAME = 0x18010;
const ARENDDATA = 0x69002;
const AMAPIATTRS = 0x69005;
const AVERSION = 0x89006;

const MAPI_NULL = 0x0001;
const MAPI_SHORT = 0x0002;
const MAPI_INT = 0x0003;
const MAPI_FLOAT = 0x0004;
const MAPI_DOUBLE = 0x0005;
const SIGNATURE = 0x223e9f78;
const LVL_MESSAGE = 0x01;
const LVL_ATTACHMENT = 0x02;

const ASUBJECT = 0x88004;
const AMCLASS = 0x78008;
const ATTACHDATA = 0x6800f;
const AFILENAME = 0x18010;
const ARENDDATA = 0x69002;
const AMAPIATTRS = 0x69005;
const AOEMCODEPAGE = 0x69007;
const AVERSION = 0x89006;

const MAPI_NULL = 0x0001;
const MAPI_SHORT = 0x0002;
const MAPI_INT = 0x0003;
const MAPI_FLOAT = 0x0004;
const MAPI_DOUBLE = 0x0005;
const MAPI_CURRENCY = 0x0006;
const MAPI_APPTIME = 0x0007;
const MAPI_ERROR = 0x000a;
const MAPI_BOOLEAN = 0x000b;
const MAPI_OBJECT = 0x000d;
const MAPI_APPTIME = 0x0007;
const MAPI_ERROR = 0x000a;
const MAPI_BOOLEAN = 0x000b;
const MAPI_OBJECT = 0x000d;
const MAPI_INT8BYTE = 0x0014;
const MAPI_STRING = 0x001e;
const MAPI_STRING = 0x001e;
const MAPI_UNICODE_STRING = 0x001f;
const MAPI_SYSTIME = 0x0040;
const MAPI_CLSID = 0x0048;
const MAPI_BINARY = 0x0102;

const MAPI_ATTACH_LONG_FILENAME = 0x3707;
const MAPI_ATTACH_MIME_TAG = 0x370E;

const MAPI_NAMED_TYPE_ID = 0x0000;
const MAPI_NAMED_TYPE_STRING = 0x0001;
const MAPI_MV_FLAG = 0x1000;
const MAPI_SYSTIME = 0x0040;
const MAPI_CLSID = 0x0048;
const MAPI_BINARY = 0x0102;

const MAPI_DISPLAY_NAME = 0x3001;
const MAPI_ADDRTYPE = 0x3002;
const MAPI_EMAIL_ADDRESS = 0x3003;
const MAPI_COMMENT = 0x3004;
const MAPI_DEPTH = 0x3005;
const MAPI_PROVIDER_DISPLAY = 0x3006;
const MAPI_CREATION_TIME = 0x3007;
const MAPI_LAST_MODIFICATION_TIME = 0x3008;
const MAPI_RESOURCE_FLAGS = 0x3009;
const MAPI_PROVIDER_DLL_NAME = 0x300A;
const MAPI_SEARCH_KEY = 0x300B;
const MAPI_ATTACHMENT_X400_PARAMETERS = 0x3700;
const MAPI_ATTACH_DATA_OBJ = 0x3701;
const MAPI_ATTACH_ENCODING = 0x3702;
const MAPI_ATTACH_EXTENSION = 0x3703;
const MAPI_ATTACH_FILENAME = 0x3704;
const MAPI_ATTACH_METHOD = 0x3705;
const MAPI_ATTACH_LONG_FILENAME = 0x3707;
const MAPI_ATTACH_PATHNAME = 0x3708;
const MAPI_ATTACH_RENDERING = 0x3709;
const MAPI_ATTACH_TAG = 0x370A;
const MAPI_RENDERING_POSITION = 0x370B;
const MAPI_ATTACH_TRANSPORT_NAME = 0x370C;
const MAPI_ATTACH_LONG_PATHNAME = 0x370D;
const MAPI_ATTACH_MIME_TAG = 0x370E;
const MAPI_ATTACH_ADDITIONAL_INFO = 0x370F;
const MAPI_ATTACH_MIME_SEQUENCE = 0x3710;
const MAPI_ATTACH_CONTENT_ID = 0x3712;
const MAPI_ATTACH_CONTENT_LOCATION = 0x3713;
const MAPI_ATTACH_FLAGS = 0x3714;

const MAPI_NAMED_TYPE_ID = 0x0000;
const MAPI_NAMED_TYPE_STRING = 0x0001;
const MAPI_MV_FLAG = 0x1000;

/**
* Decompress the data.
Expand Down Expand Up @@ -153,10 +183,12 @@ protected function _geti(&$data, $bits)
protected function _decodeAttribute(&$data, $attribute)
{
/* Data. */
$this->_getx($data, $this->_geti($data, 32));
$value = $this->_getx($data, $this->_geti($data, 32));

/* Checksum. */
$this->_geti($data, 16);

return $value;
}

/**
Expand Down Expand Up @@ -254,13 +286,13 @@ protected function _extractMapiAttributes($data, &$attachment_data)
/* Store any interesting attributes. */
switch ($attr_name) {
case self::MAPI_ATTACH_LONG_FILENAME:
$value = str_replace("\0", '', $value);
$value = $this->convertString($value);
/* Used in preference to AFILENAME value. */
$attachment_data[0]['name'] = preg_replace('/.*[\/](.*)$/', '\1', $value);
break;

case self::MAPI_ATTACH_MIME_TAG:
$value = str_replace("\0", '', $value);
$value = $this->convertString($value);
/* Is this ever set, and what is format? */
$attachment_data[0]['type'] = preg_replace('/^(.*)\/.*/', '\1', $value);
$attachment_data[0]['subtype'] = preg_replace('/.*\/(.*)$/', '\1', $value);
Expand All @@ -276,7 +308,18 @@ protected function _extractMapiAttributes($data, &$attachment_data)
*/
protected function _decodeMessage(&$data)
{
$this->_decodeAttribute($data, $this->_geti($data, 32));
$attribute = $this->_geti($data, 32);
$value = $this->_decodeAttribute($data, $attribute);

switch ($attribute) {
case self::AOEMCODEPAGE:
// Find codepage of the message
$value = unpack('V', $value);
$this->codepage = $value[1];
break;

default:
}
}

/**
Expand Down Expand Up @@ -307,7 +350,8 @@ protected function _decodeAttachment(&$data, &$attachment_data)

case self::AFILENAME:
$value = $this->_getx($data, $this->_geti($data, 32));
$value = str_replace("\0", '', $value);
$value = $this->convertString($value, true);

/* Strip path. */
$attachment_data[0]['name'] = preg_replace('/.*[\/](.*)$/', '\1', $value);

Expand All @@ -318,16 +362,15 @@ protected function _decodeAttachment(&$data, &$attachment_data)
case self::ATTACHDATA:
/* The attachment itself. */
$length = $this->_geti($data, 32);
$attachment_data[0]['size'] = $length;
$attachment_data[0]['size'] = $length;
$attachment_data[0]['stream'] = $this->_getx($data, $length);

/* Checksum */
$this->_geti($data, 16);
break;

case self::AMAPIATTRS:
$length = $this->_geti($data, 32);
$value = $this->_getx($data, $length);
$value = $this->_getx($data, $this->_geti($data, 32));

/* Checksum */
$this->_geti($data, 16);
Expand All @@ -338,4 +381,23 @@ protected function _decodeAttachment(&$data, &$attachment_data)
$this->_decodeAttribute($data, $attribute);
}
}

/**
* Convert string value to system charset according to defined codepage
*/
protected function convertString($str, $use_codepage = false)
{
$str = rtrim($str, "\0");

if ($convert && $this->codepage
&& ($charset = rcube_charset::$windows_codepages[$this->codepage])
) {
$str = rcube_charset::convert($str, $charset);
}
else if (strpos($str, "\0") !== false) {
$str = rcube_charset::convert($str, 'UTF-16LE');
}

return $str;
}
}

0 comments on commit b4c877b

Please sign in to comment.