From 11a78a8746f47b9e53494ba4d14b04bb24475aaa Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Thu, 9 Jul 2015 15:46:36 +0200 Subject: [PATCH 1/4] =?UTF-8?q?replace=20unsupported=20ucs-2=20characters?= =?UTF-8?q?=20in=20a=20string=20with=20=EF=BF=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Doctrine/DBAL/Driver/PDODblib/Connection.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Doctrine/DBAL/Driver/PDODblib/Connection.php b/Doctrine/DBAL/Driver/PDODblib/Connection.php index 3e004d7..c6d0dad 100644 --- a/Doctrine/DBAL/Driver/PDODblib/Connection.php +++ b/Doctrine/DBAL/Driver/PDODblib/Connection.php @@ -38,9 +38,21 @@ public function quote($value, $type = \PDO::PARAM_STR) { // Fix for a driver version terminating all values with null byte $val = rtrim($val, "\0"); + + // Freetds communicates with the server using UCS-2 (since v7.0). + // Freetds claims to convert from any given client charset to UCS-2 using iconv. Which should strip or replace unsupported chars. + // However in my experience, characters like 👍 (THUMBS UP SIGN, \u1F44D) end up in MSSQL shouting 'incorrect syntax' still. + $val = static::replaceNonUcs2Chars($val); return $val; } + + public static function replaceNonUcs2Chars($val) + { + // UCS-2 cannot represent unicode code points outside the BMP. (> 16 bits.) + // Replace those chars with the REPLACEMENT CHARACTER. + return preg_replace('/[^\x{0}-x{FFFF}]/u', "\xFFFD", $val); + } /** * @return bool PDO_DBlib transaction support From 7ce83439c601792598f0263d23133e7f4e926b8f Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Fri, 10 Jul 2015 15:54:10 +0200 Subject: [PATCH 2/4] moved the non-UCS-2 character replacement to PDOStatement#bindValue --- Doctrine/DBAL/Driver/PDODblib/Connection.php | 19 ++++------ .../DBAL/Driver/PDODblib/PDOStatement.php | 36 +++++++++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 Doctrine/DBAL/Driver/PDODblib/PDOStatement.php diff --git a/Doctrine/DBAL/Driver/PDODblib/Connection.php b/Doctrine/DBAL/Driver/PDODblib/Connection.php index c6d0dad..21243d8 100644 --- a/Doctrine/DBAL/Driver/PDODblib/Connection.php +++ b/Doctrine/DBAL/Driver/PDODblib/Connection.php @@ -30,6 +30,13 @@ class Connection extends \Doctrine\DBAL\Driver\PDOConnection implements \Doctrin protected $_pdoTransactionsSupport = null; protected $_pdoLastInsertIdSupport = null; + + public function __construct($dsn, $user = null, $password = null, $options = null) + { + parent::__construct($dsn, $user, $password, $options); + $this->setAttribute(\PDO::ATTR_STATEMENT_CLASS, array(__NAMESPACE__ . '\PDOStatement', array())); + } + /** * @override */ @@ -38,21 +45,9 @@ public function quote($value, $type = \PDO::PARAM_STR) { // Fix for a driver version terminating all values with null byte $val = rtrim($val, "\0"); - - // Freetds communicates with the server using UCS-2 (since v7.0). - // Freetds claims to convert from any given client charset to UCS-2 using iconv. Which should strip or replace unsupported chars. - // However in my experience, characters like 👍 (THUMBS UP SIGN, \u1F44D) end up in MSSQL shouting 'incorrect syntax' still. - $val = static::replaceNonUcs2Chars($val); return $val; } - - public static function replaceNonUcs2Chars($val) - { - // UCS-2 cannot represent unicode code points outside the BMP. (> 16 bits.) - // Replace those chars with the REPLACEMENT CHARACTER. - return preg_replace('/[^\x{0}-x{FFFF}]/u', "\xFFFD", $val); - } /** * @return bool PDO_DBlib transaction support diff --git a/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php new file mode 100644 index 0000000..c0ab74b --- /dev/null +++ b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php @@ -0,0 +1,36 @@ + 16 bits.) + * This function replaces those characters in a string with the REPLACEMENT CHARACTER. + * @param string $val + * @return string + */ + public static function replaceNonUcs2Chars($val) + { + return is_string($val) ? \preg_replace('/[^\x{0}-x{FFFF}]/u', "�", $val) : $val; + } + + +} From 73cb53ea59794607c3a5e15bc3af4edccba1f522 Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Sun, 12 Jul 2015 04:09:06 +0200 Subject: [PATCH 3/4] fix a nasty stupid typo in the replacement regex --- Doctrine/DBAL/Driver/PDODblib/PDOStatement.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php index c0ab74b..e9b8835 100644 --- a/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php +++ b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php @@ -29,7 +29,7 @@ public function bindValue($param, $value, $type = \PDO::PARAM_STR) */ public static function replaceNonUcs2Chars($val) { - return is_string($val) ? \preg_replace('/[^\x{0}-x{FFFF}]/u', "�", $val) : $val; + return is_string($val) ? \preg_replace('/[^\x{0}-\x{FFFF}]/u', "�", $val) : $val; } From 95ef1b88d8deffa3a7ef22cf01b301956d2afbf3 Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Mon, 14 Mar 2016 20:53:13 +0100 Subject: [PATCH 4/4] fix null character should also be filtered --- .../DBAL/Driver/PDODblib/PDOStatement.php | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php index e9b8835..122afc4 100644 --- a/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php +++ b/Doctrine/DBAL/Driver/PDODblib/PDOStatement.php @@ -4,33 +4,36 @@ class PDOStatement extends \Doctrine\DBAL\Driver\PDOStatement { - + /** + * @var string Regex matching freetds non-supported characters. + */ + const FREETDS_INVALID_CHAR_REGEX = '/[^\x{1}-\x{FFFF}]/u'; + /** * {@inheritdoc} - * + * * Freetds communicates with the server using UCS-2 (since v7.0). - * Freetds claims to convert from any given client charset to UCS-2 using iconv. Which should strip or replace unsupported chars. + * Freetds claims to convert from any given client charset to UCS-2 using iconv. Which should strip or replace unsupported chars. * However in my experience, characters like 👍 (THUMBS UP SIGN, \u1F44D) still end up in Sqlsrv shouting '102 incorrect syntax'. + * As does the null character \u0000. + * * Upon binding a value, this function replaces the unsupported characters. */ public function bindValue($param, $value, $type = \PDO::PARAM_STR) { if ($type == \PDO::PARAM_STR) { - $value = static::replaceNonUcs2Chars($value); + $value = static::replaceUnsupportedFreetdsChars($value); } return parent::bindValue($param, $value, $type); } /** - * UCS-2 cannot represent Unicode code points outside the BMP. (> 16 bits.) - * This function replaces those characters in a string with the REPLACEMENT CHARACTER. + * This function replaces characters in a string unsupported by freetds with the REPLACEMENT CHARACTER. * @param string $val * @return string */ - public static function replaceNonUcs2Chars($val) + public static function replaceUnsupportedFreetdsChars($val) { - return is_string($val) ? \preg_replace('/[^\x{0}-\x{FFFF}]/u', "�", $val) : $val; + return is_string($val) ? \preg_replace(static::FREETDS_INVALID_CHAR_REGEX, "�", $val) : $val; } - - }